LLVM 17.0.0rc
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
15#include "PPC.h"
16#include "PPCCCState.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
59#include "llvm/IR/CallingConv.h"
60#include "llvm/IR/Constant.h"
61#include "llvm/IR/Constants.h"
62#include "llvm/IR/DataLayout.h"
63#include "llvm/IR/DebugLoc.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/GlobalValue.h"
67#include "llvm/IR/IRBuilder.h"
69#include "llvm/IR/Intrinsics.h"
70#include "llvm/IR/IntrinsicsPowerPC.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/Use.h"
74#include "llvm/IR/Value.h"
75#include "llvm/MC/MCContext.h"
76#include "llvm/MC/MCExpr.h"
86#include "llvm/Support/Debug.h"
88#include "llvm/Support/Format.h"
94#include <algorithm>
95#include <cassert>
96#include <cstdint>
97#include <iterator>
98#include <list>
99#include <optional>
100#include <utility>
101#include <vector>
102
103using namespace llvm;
104
105#define DEBUG_TYPE "ppc-lowering"
106
107static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
108cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
109
110static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
111cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
114cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisableSCO("disable-ppc-sco",
117cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
118
119static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
120cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
121
122static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
123cl::desc("use absolute jump tables on ppc"), cl::Hidden);
124
125static cl::opt<bool>
126 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
127 cl::desc("disable vector permute decomposition"),
128 cl::init(true), cl::Hidden);
129
131 "disable-auto-paired-vec-st",
132 cl::desc("disable automatically generated 32byte paired vector stores"),
133 cl::init(true), cl::Hidden);
134
135STATISTIC(NumTailCalls, "Number of tail calls");
136STATISTIC(NumSiblingCalls, "Number of sibling calls");
138 "Number of shuffles lowered to a VPERM or XXPERM");
139STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
140
141static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
142
143static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
144
145static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
146
147// FIXME: Remove this once the bug has been fixed!
149
151 const PPCSubtarget &STI)
152 : TargetLowering(TM), Subtarget(STI) {
153 // Initialize map that relates the PPC addressing modes to the computed flags
154 // of a load/store instruction. The map is used to determine the optimal
155 // addressing mode when selecting load and stores.
156 initializeAddrModeMap();
157 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
158 // arguments are at least 4/8 bytes aligned.
159 bool isPPC64 = Subtarget.isPPC64();
160 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
161
162 // Set up the register classes.
163 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
164 if (!useSoftFloat()) {
165 if (hasSPE()) {
166 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
167 // EFPU2 APU only supports f32
168 if (!Subtarget.hasEFPU2())
169 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
170 } else {
171 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
172 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
173 }
174 }
175
176 // Match BITREVERSE to customized fast code sequence in the td file.
179
180 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
182
183 // Custom lower inline assembly to check for special registers.
186
187 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
188 for (MVT VT : MVT::integer_valuetypes()) {
191 }
192
193 if (Subtarget.isISA3_0()) {
194 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
195 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
196 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
197 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
198 } else {
199 // No extending loads from f16 or HW conversions back and forth.
200 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
203 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
206 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
207 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
208 }
209
210 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
211
212 // PowerPC has pre-inc load and store's.
223 if (!Subtarget.hasSPE()) {
228 }
229
230 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
231 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
232 for (MVT VT : ScalarIntVTs) {
237 }
238
239 if (Subtarget.useCRBits()) {
241
242 if (isPPC64 || Subtarget.hasFPCVT()) {
245 isPPC64 ? MVT::i64 : MVT::i32);
248 isPPC64 ? MVT::i64 : MVT::i32);
249
252 isPPC64 ? MVT::i64 : MVT::i32);
255 isPPC64 ? MVT::i64 : MVT::i32);
256
259 isPPC64 ? MVT::i64 : MVT::i32);
262 isPPC64 ? MVT::i64 : MVT::i32);
263
266 isPPC64 ? MVT::i64 : MVT::i32);
269 isPPC64 ? MVT::i64 : MVT::i32);
270 } else {
275 }
276
277 // PowerPC does not support direct load/store of condition registers.
280
281 // FIXME: Remove this once the ANDI glue bug is fixed:
282 if (ANDIGlueBug)
284
285 for (MVT VT : MVT::integer_valuetypes()) {
288 setTruncStoreAction(VT, MVT::i1, Expand);
289 }
290
291 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
292 }
293
294 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
295 // PPC (the libcall is not available).
300
301 // We do not currently implement these libm ops for PowerPC.
302 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
303 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
304 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
305 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
307 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
308
309 // PowerPC has no SREM/UREM instructions unless we are on P9
310 // On P9 we may use a hardware instruction to compute the remainder.
311 // When the result of both the remainder and the division is required it is
312 // more efficient to compute the remainder from the result of the division
313 // rather than use the remainder instruction. The instructions are legalized
314 // directly because the DivRemPairsPass performs the transformation at the IR
315 // level.
316 if (Subtarget.isISA3_0()) {
321 } else {
326 }
327
328 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
337
338 // Handle constrained floating-point operations of scalar.
339 // TODO: Handle SPE specific operation.
345
350
351 if (!Subtarget.hasSPE()) {
354 }
355
356 if (Subtarget.hasVSX()) {
359 }
360
361 if (Subtarget.hasFSQRT()) {
364 }
365
366 if (Subtarget.hasFPRND()) {
371
376 }
377
378 // We don't support sin/cos/sqrt/fmod/pow
389
390 // MASS transformation for LLVM intrinsics with replicating fast-math flag
391 // to be consistent to PPCGenScalarMASSEntries pass
392 if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
405 }
406
407 if (Subtarget.hasSPE()) {
410 } else {
411 setOperationAction(ISD::FMA , MVT::f64, Legal);
412 setOperationAction(ISD::FMA , MVT::f32, Legal);
413 }
414
415 if (Subtarget.hasSPE())
416 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
417
419
420 // If we're enabling GP optimizations, use hardware square root
421 if (!Subtarget.hasFSQRT() &&
422 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
423 Subtarget.hasFRE()))
425
426 if (!Subtarget.hasFSQRT() &&
427 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
428 Subtarget.hasFRES()))
430
431 if (Subtarget.hasFCPSGN()) {
434 } else {
437 }
438
439 if (Subtarget.hasFPRND()) {
444
449 }
450
451 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
452 // instruction xxbrd to speed up scalar BSWAP64.
453 if (Subtarget.isISA3_1()) {
456 } else {
459 ISD::BSWAP, MVT::i64,
460 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
461 }
462
463 // CTPOP or CTTZ were introduced in P8/P9 respectively
464 if (Subtarget.isISA3_0()) {
465 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
466 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
467 } else {
468 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
469 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
470 }
471
472 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
475 } else {
478 }
479
480 // PowerPC does not have ROTR
483
484 if (!Subtarget.useCRBits()) {
485 // PowerPC does not have Select
490 }
491
492 // PowerPC wants to turn select_cc of FP into fsel when possible.
495
496 // PowerPC wants to optimize integer setcc a bit
497 if (!Subtarget.useCRBits())
499
500 if (Subtarget.hasFPU()) {
504
508 }
509
510 // PowerPC does not have BRCOND which requires SetCC
511 if (!Subtarget.useCRBits())
513
515
516 if (Subtarget.hasSPE()) {
517 // SPE has built-in conversions
524
525 // SPE supports signaling compare of f32/f64.
528 } else {
529 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
532
533 // PowerPC does not have [U|S]INT_TO_FP
538 }
539
540 if (Subtarget.hasDirectMove() && isPPC64) {
545 if (TM.Options.UnsafeFPMath) {
554 }
555 } else {
560 }
561
562 // We cannot sextinreg(i1). Expand to shifts.
564
565 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
566 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
567 // support continuation, user-level threading, and etc.. As a result, no
568 // other SjLj exception interfaces are implemented and please don't build
569 // your own exception handling based on them.
570 // LLVM/Clang supports zero-cost DWARF exception handling.
573
574 // We want to legalize GlobalAddress and ConstantPool nodes into the
575 // appropriate instructions to materialize the address.
586
587 // TRAP is legal.
588 setOperationAction(ISD::TRAP, MVT::Other, Legal);
589
590 // TRAMPOLINE is custom lowered.
593
594 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
596
597 if (Subtarget.is64BitELFABI()) {
598 // VAARG always uses double-word chunks, so promote anything smaller.
600 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
602 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
604 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
606 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
608 } else if (Subtarget.is32BitELFABI()) {
609 // VAARG is custom lowered with the 32-bit SVR4 ABI.
612 } else
614
615 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
616 if (Subtarget.is32BitELFABI())
618 else
620
621 // Use the default implementation.
622 setOperationAction(ISD::VAEND , MVT::Other, Expand);
631
632 // We want to custom lower some of our intrinsics.
638
639 // To handle counter-based loop conditions.
641
646
647 // Comparisons that require checking two conditions.
648 if (Subtarget.hasSPE()) {
653 }
666
669
670 if (Subtarget.has64BitSupport()) {
671 // They also have instructions for converting between i64 and fp.
680 // This is just the low 32 bits of a (signed) fp->i64 conversion.
681 // We cannot do this with Promote because i64 is not a legal type.
684
685 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
688 }
689 } else {
690 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
691 if (Subtarget.hasSPE()) {
694 } else {
697 }
698 }
699
700 // With the instructions enabled under FPCVT, we can do everything.
701 if (Subtarget.hasFPCVT()) {
702 if (Subtarget.has64BitSupport()) {
711 }
712
721 }
722
723 if (Subtarget.use64BitRegs()) {
724 // 64-bit PowerPC implementations can support i64 types directly
725 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
726 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
728 // 64-bit PowerPC wants to expand i128 shifts itself.
732 } else {
733 // 32-bit PowerPC wants to expand i64 shifts itself.
737 }
738
739 // PowerPC has better expansions for funnel shifts than the generic
740 // TargetLowering::expandFunnelShift.
741 if (Subtarget.has64BitSupport()) {
744 }
747
748 if (Subtarget.hasVSX()) {
753 }
754
755 if (Subtarget.hasAltivec()) {
756 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
761 }
762 // First set operation action for all vector types to expand. Then we
763 // will selectively turn on ones that can be effectively codegen'd.
765 // add/sub are legal for all supported vector VT's.
768
769 // For v2i64, these are only valid with P8Vector. This is corrected after
770 // the loop.
771 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
776 }
777 else {
782 }
783
784 if (Subtarget.hasVSX()) {
787 }
788
789 // Vector instructions introduced in P8
790 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
793 }
794 else {
797 }
798
799 // Vector instructions introduced in P9
800 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
802 else
804
805 // We promote all shuffles to v16i8.
807 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
808
809 // We promote all non-typed operations to v4i32.
811 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
813 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
815 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
817 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
819 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
822 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
824 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
825
826 // No other operations are legal.
865
871 }
872 }
874 if (!Subtarget.hasP8Vector()) {
875 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
876 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
877 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
878 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
879 }
880
881 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
882 // with merges, splats, etc.
884
885 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
886 // are cheap, so handle them before they get expanded to scalar.
892
893 setOperationAction(ISD::AND , MVT::v4i32, Legal);
894 setOperationAction(ISD::OR , MVT::v4i32, Legal);
895 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
896 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
898 Subtarget.useCRBits() ? Legal : Expand);
899 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
909 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
912
913 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
914 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
915 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
916 if (Subtarget.hasAltivec())
917 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
919 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
920 if (Subtarget.hasP8Altivec())
921 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
922
923 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
924 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
925 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
926 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
927
928 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
929 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
930
931 if (Subtarget.hasVSX()) {
932 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
933 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
935 }
936
937 if (Subtarget.hasP8Altivec())
938 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
939 else
940 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
941
942 if (Subtarget.isISA3_1()) {
943 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
944 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
945 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
946 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
947 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
948 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
949 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
950 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
951 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
952 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
953 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
954 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
955 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
956 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
957 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
958 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
959 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
960 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
961 }
962
963 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
964 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
965
968
973
974 // Altivec does not contain unordered floating-point compare instructions
975 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
977 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
979
980 if (Subtarget.hasVSX()) {
983 if (Subtarget.hasP8Vector()) {
986 }
987 if (Subtarget.hasDirectMove() && isPPC64) {
996 }
998
999 // The nearbyint variants are not allowed to raise the inexact exception
1000 // so we can only code-gen them with unsafe math.
1001 if (TM.Options.UnsafeFPMath) {
1004 }
1005
1006 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1007 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1008 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1010 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1011 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1014
1016 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1017 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1020
1021 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1022 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1023
1024 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1025 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1026
1027 // Share the Altivec comparison restrictions.
1028 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1029 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1030 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1031 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1032
1033 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1034 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1035
1037
1038 if (Subtarget.hasP8Vector())
1039 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1040
1041 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1042
1043 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1044 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1045 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1046
1047 if (Subtarget.hasP8Altivec()) {
1048 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1049 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1050 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1051
1052 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1053 // SRL, but not for SRA because of the instructions available:
1054 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1055 // doing
1056 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1057 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1058 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1059
1060 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1061 }
1062 else {
1063 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1064 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1065 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1066
1067 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1068
1069 // VSX v2i64 only supports non-arithmetic operations.
1070 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1071 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1072 }
1073
1074 if (Subtarget.isISA3_1())
1075 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1076 else
1077 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1078
1079 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1080 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1082 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1083
1085
1094
1095 // Custom handling for partial vectors of integers converted to
1096 // floating point. We already have optimal handling for v2i32 through
1097 // the DAG combine, so those aren't necessary.
1114
1115 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1116 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1117 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1118 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1121
1124
1125 // Handle constrained floating-point operations of vector.
1126 // The predictor is `hasVSX` because altivec instruction has
1127 // no exception but VSX vector instruction has.
1141
1155
1156 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1157 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1158
1159 for (MVT FPT : MVT::fp_valuetypes())
1161
1162 // Expand the SELECT to SELECT_CC
1164
1165 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1166 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1167
1168 // No implementation for these ops for PowerPC.
1169 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1170 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1171 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1173 setOperationAction(ISD::FREM, MVT::f128, Expand);
1174 }
1175
1176 if (Subtarget.hasP8Altivec()) {
1177 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1178 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1179 }
1180
1181 if (Subtarget.hasP9Vector()) {
1184
1185 // Test data class instructions store results in CR bits.
1186 if (Subtarget.useCRBits()) {
1190 }
1191
1192 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1193 // SRL, but not for SRA because of the instructions available:
1194 // VS{RL} and VS{RL}O.
1195 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1196 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1197 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1198
1199 setOperationAction(ISD::FADD, MVT::f128, Legal);
1200 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1201 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1202 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1204
1205 setOperationAction(ISD::FMA, MVT::f128, Legal);
1212
1214 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1216 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1219
1223
1224 // Handle constrained floating-point operations of fp128
1241 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1242 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1243 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1244 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1245 } else if (Subtarget.hasVSX()) {
1248
1249 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1250 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1251
1252 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1253 // fp_to_uint and int_to_fp.
1256
1257 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1258 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1259 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1260 setOperationAction(ISD::FABS, MVT::f128, Expand);
1262 setOperationAction(ISD::FMA, MVT::f128, Expand);
1264
1265 // Expand the fp_extend if the target type is fp128.
1268
1269 // Expand the fp_round if the source type is fp128.
1270 for (MVT VT : {MVT::f32, MVT::f64}) {
1273 }
1274
1279
1280 // Lower following f128 select_cc pattern:
1281 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1283
1284 // We need to handle f128 SELECT_CC with integer result type.
1286 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1287 }
1288
1289 if (Subtarget.hasP9Altivec()) {
1290 if (Subtarget.isISA3_1()) {
1295 } else {
1298 }
1306
1307 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1308 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1309 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1310 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1311 }
1312
1313 if (Subtarget.hasP10Vector()) {
1315 }
1316 }
1317
1318 if (Subtarget.pairedVectorMemops()) {
1319 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1320 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1321 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1322 }
1323 if (Subtarget.hasMMA()) {
1324 if (Subtarget.isISAFuture())
1325 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1326 else
1327 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1328 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1329 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1331 }
1332
1333 if (Subtarget.has64BitSupport())
1335
1336 if (Subtarget.isISA3_1())
1337 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1338
1339 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1340
1341 if (!isPPC64) {
1344 }
1345
1350 }
1351
1353
1354 if (Subtarget.hasAltivec()) {
1355 // Altivec instructions set fields to all zeros or all ones.
1357 }
1358
1359 setLibcallName(RTLIB::MULO_I128, nullptr);
1360 if (!isPPC64) {
1361 // These libcalls are not available in 32-bit.
1362 setLibcallName(RTLIB::SHL_I128, nullptr);
1363 setLibcallName(RTLIB::SRL_I128, nullptr);
1364 setLibcallName(RTLIB::SRA_I128, nullptr);
1365 setLibcallName(RTLIB::MUL_I128, nullptr);
1366 setLibcallName(RTLIB::MULO_I64, nullptr);
1367 }
1368
1371 else if (isPPC64)
1373 else
1375
1376 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1377
1378 // We have target-specific dag combine patterns for the following nodes:
1381 if (Subtarget.hasFPCVT())
1384 if (Subtarget.useCRBits())
1388
1390
1392
1393 if (Subtarget.useCRBits()) {
1395 }
1396
1397 setLibcallName(RTLIB::LOG_F128, "logf128");
1398 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1399 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1400 setLibcallName(RTLIB::EXP_F128, "expf128");
1401 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1402 setLibcallName(RTLIB::SIN_F128, "sinf128");
1403 setLibcallName(RTLIB::COS_F128, "cosf128");
1404 setLibcallName(RTLIB::POW_F128, "powf128");
1405 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1406 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1407 setLibcallName(RTLIB::REM_F128, "fmodf128");
1408 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1409 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1410 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1411 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1412 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1413 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1414 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1415 setLibcallName(RTLIB::RINT_F128, "rintf128");
1416 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1417 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1418 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1419 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1420
1421 if (Subtarget.isAIXABI()) {
1422 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1423 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1424 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1425 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1426 }
1427
1428 // With 32 condition bits, we don't need to sink (and duplicate) compares
1429 // aggressively in CodeGenPrep.
1430 if (Subtarget.useCRBits()) {
1433 }
1434
1436
1437 switch (Subtarget.getCPUDirective()) {
1438 default: break;
1439 case PPC::DIR_970:
1440 case PPC::DIR_A2:
1441 case PPC::DIR_E500:
1442 case PPC::DIR_E500mc:
1443 case PPC::DIR_E5500:
1444 case PPC::DIR_PWR4:
1445 case PPC::DIR_PWR5:
1446 case PPC::DIR_PWR5X:
1447 case PPC::DIR_PWR6:
1448 case PPC::DIR_PWR6X:
1449 case PPC::DIR_PWR7:
1450 case PPC::DIR_PWR8:
1451 case PPC::DIR_PWR9:
1452 case PPC::DIR_PWR10:
1456 break;
1457 }
1458
1459 if (Subtarget.enableMachineScheduler())
1461 else
1463
1465
1466 // The Freescale cores do better with aggressive inlining of memcpy and
1467 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1468 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1469 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1470 MaxStoresPerMemset = 32;
1472 MaxStoresPerMemcpy = 32;
1476 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1477 // The A2 also benefits from (very) aggressive inlining of memcpy and
1478 // friends. The overhead of a the function call, even when warm, can be
1479 // over one hundred cycles.
1480 MaxStoresPerMemset = 128;
1481 MaxStoresPerMemcpy = 128;
1482 MaxStoresPerMemmove = 128;
1483 MaxLoadsPerMemcmp = 128;
1484 } else {
1487 }
1488
1489 IsStrictFPEnabled = true;
1490
1491 // Let the subtarget (CPU) decide if a predictable select is more expensive
1492 // than the corresponding branch. This information is used in CGP to decide
1493 // when to convert selects into branches.
1495}
1496
1497// *********************************** NOTE ************************************
1498// For selecting load and store instructions, the addressing modes are defined
1499// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1500// patterns to match the load the store instructions.
1501//
1502// The TD definitions for the addressing modes correspond to their respective
1503// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1504// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1505// address mode flags of a particular node. Afterwards, the computed address
1506// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1507// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1508// accordingly, based on the preferred addressing mode.
1509//
1510// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1511// MemOpFlags contains all the possible flags that can be used to compute the
1512// optimal addressing mode for load and store instructions.
1513// AddrMode contains all the possible load and store addressing modes available
1514// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1515//
1516// When adding new load and store instructions, it is possible that new address
1517// flags may need to be added into MemOpFlags, and a new addressing mode will
1518// need to be added to AddrMode. An entry of the new addressing mode (consisting
1519// of the minimal and main distinguishing address flags for the new load/store
1520// instructions) will need to be added into initializeAddrModeMap() below.
1521// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1522// need to be updated to account for selecting the optimal addressing mode.
1523// *****************************************************************************
1524/// Initialize the map that relates the different addressing modes of the load
1525/// and store instructions to a set of flags. This ensures the load/store
1526/// instruction is correctly matched during instruction selection.
1527void PPCTargetLowering::initializeAddrModeMap() {
1528 AddrModesMap[PPC::AM_DForm] = {
1529 // LWZ, STW
1534 // LBZ, LHZ, STB, STH
1539 // LHA
1544 // LFS, LFD, STFS, STFD
1549 };
1550 AddrModesMap[PPC::AM_DSForm] = {
1551 // LWA
1555 // LD, STD
1559 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1563 };
1564 AddrModesMap[PPC::AM_DQForm] = {
1565 // LXV, STXV
1569 };
1570 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1572 // TODO: Add mapping for quadword load/store.
1573}
1574
1575/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1576/// the desired ByVal argument alignment.
1578 if (MaxAlign == MaxMaxAlign)
1579 return;
1580 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1581 if (MaxMaxAlign >= 32 &&
1582 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1583 MaxAlign = Align(32);
1584 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1585 MaxAlign < 16)
1586 MaxAlign = Align(16);
1587 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1589 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1590 if (EltAlign > MaxAlign)
1592 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1593 for (auto *EltTy : STy->elements()) {
1596 if (EltAlign > MaxAlign)
1598 if (MaxAlign == MaxMaxAlign)
1599 break;
1600 }
1601 }
1602}
1603
1604/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1605/// function arguments in the caller parameter area.
1607 const DataLayout &DL) const {
1608 // 16byte and wider vectors are passed on 16byte boundary.
1609 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1610 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1611 if (Subtarget.hasAltivec())
1612 getMaxByValAlign(Ty, Alignment, Align(16));
1613 return Alignment.value();
1614}
1615
1617 return Subtarget.useSoftFloat();
1618}
1619
1621 return Subtarget.hasSPE();
1622}
1623
1625 return VT.isScalarInteger();
1626}
1627
1628const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1629 switch ((PPCISD::NodeType)Opcode) {
1630 case PPCISD::FIRST_NUMBER: break;
1631 case PPCISD::FSEL: return "PPCISD::FSEL";
1632 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1633 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1634 case PPCISD::FCFID: return "PPCISD::FCFID";
1635 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1636 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1637 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1638 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1639 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1640 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1641 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1642 case PPCISD::FRE: return "PPCISD::FRE";
1643 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1644 case PPCISD::FTSQRT:
1645 return "PPCISD::FTSQRT";
1646 case PPCISD::FSQRT:
1647 return "PPCISD::FSQRT";
1648 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1649 case PPCISD::VPERM: return "PPCISD::VPERM";
1650 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1652 return "PPCISD::XXSPLTI_SP_TO_DP";
1654 return "PPCISD::XXSPLTI32DX";
1655 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1656 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1657 case PPCISD::XXPERM:
1658 return "PPCISD::XXPERM";
1659 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1660 case PPCISD::CMPB: return "PPCISD::CMPB";
1661 case PPCISD::Hi: return "PPCISD::Hi";
1662 case PPCISD::Lo: return "PPCISD::Lo";
1663 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1664 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1665 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1666 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1667 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1668 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1669 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1670 case PPCISD::SRL: return "PPCISD::SRL";
1671 case PPCISD::SRA: return "PPCISD::SRA";
1672 case PPCISD::SHL: return "PPCISD::SHL";
1673 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1674 case PPCISD::CALL: return "PPCISD::CALL";
1675 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1676 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1677 case PPCISD::CALL_RM:
1678 return "PPCISD::CALL_RM";
1680 return "PPCISD::CALL_NOP_RM";
1682 return "PPCISD::CALL_NOTOC_RM";
1683 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1684 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1685 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1686 case PPCISD::BCTRL_RM:
1687 return "PPCISD::BCTRL_RM";
1689 return "PPCISD::BCTRL_LOAD_TOC_RM";
1690 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1691 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1692 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1693 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1694 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1695 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1696 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1697 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1698 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1699 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1701 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1703 return "PPCISD::ANDI_rec_1_EQ_BIT";
1705 return "PPCISD::ANDI_rec_1_GT_BIT";
1706 case PPCISD::VCMP: return "PPCISD::VCMP";
1707 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1708 case PPCISD::LBRX: return "PPCISD::LBRX";
1709 case PPCISD::STBRX: return "PPCISD::STBRX";
1710 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1711 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1712 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1713 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1714 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1715 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1716 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1717 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1718 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1720 return "PPCISD::ST_VSR_SCAL_INT";
1721 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1722 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1723 case PPCISD::BDZ: return "PPCISD::BDZ";
1724 case PPCISD::MFFS: return "PPCISD::MFFS";
1725 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1726 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1727 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1728 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1729 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1730 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1731 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1732 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1733 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1734 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1735 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1736 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1737 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1738 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1739 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1740 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1741 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1742 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1743 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1744 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1745 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1747 return "PPCISD::PADDI_DTPREL";
1748 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1749 case PPCISD::SC: return "PPCISD::SC";
1750 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1751 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1752 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1753 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1754 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1755 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1756 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1757 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1758 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1759 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1760 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1761 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1763 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1765 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1766 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1767 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1768 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1769 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1770 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1771 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1772 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1773 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1775 return "PPCISD::STRICT_FADDRTZ";
1777 return "PPCISD::STRICT_FCTIDZ";
1779 return "PPCISD::STRICT_FCTIWZ";
1781 return "PPCISD::STRICT_FCTIDUZ";
1783 return "PPCISD::STRICT_FCTIWUZ";
1785 return "PPCISD::STRICT_FCFID";
1787 return "PPCISD::STRICT_FCFIDU";
1789 return "PPCISD::STRICT_FCFIDS";
1791 return "PPCISD::STRICT_FCFIDUS";
1792 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1793 case PPCISD::STORE_COND:
1794 return "PPCISD::STORE_COND";
1795 }
1796 return nullptr;
1797}
1798
1800 EVT VT) const {
1801 if (!VT.isVector())
1802 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1803
1805}
1806
1808 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1809 return true;
1810}
1811
1812//===----------------------------------------------------------------------===//
1813// Node matching predicates, for use by the tblgen matching code.
1814//===----------------------------------------------------------------------===//
1815
1816/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1819 return CFP->getValueAPF().isZero();
1820 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1821 // Maybe this has already been legalized into the constant pool?
1822 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1823 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1824 return CFP->getValueAPF().isZero();
1825 }
1826 return false;
1827}
1828
1829/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1830/// true if Op is undef or if it matches the specified value.
1831static bool isConstantOrUndef(int Op, int Val) {
1832 return Op < 0 || Op == Val;
1833}
1834
1835/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1836/// VPKUHUM instruction.
1837/// The ShuffleKind distinguishes between big-endian operations with
1838/// two different inputs (0), either-endian operations with two identical
1839/// inputs (1), and little-endian operations with two different inputs (2).
1840/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1842 SelectionDAG &DAG) {
1843 bool IsLE = DAG.getDataLayout().isLittleEndian();
1844 if (ShuffleKind == 0) {
1845 if (IsLE)
1846 return false;
1847 for (unsigned i = 0; i != 16; ++i)
1848 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1849 return false;
1850 } else if (ShuffleKind == 2) {
1851 if (!IsLE)
1852 return false;
1853 for (unsigned i = 0; i != 16; ++i)
1854 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1855 return false;
1856 } else if (ShuffleKind == 1) {
1857 unsigned j = IsLE ? 0 : 1;
1858 for (unsigned i = 0; i != 8; ++i)
1859 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1860 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1861 return false;
1862 }
1863 return true;
1864}
1865
1866/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1867/// VPKUWUM instruction.
1868/// The ShuffleKind distinguishes between big-endian operations with
1869/// two different inputs (0), either-endian operations with two identical
1870/// inputs (1), and little-endian operations with two different inputs (2).
1871/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1873 SelectionDAG &DAG) {
1874 bool IsLE = DAG.getDataLayout().isLittleEndian();
1875 if (ShuffleKind == 0) {
1876 if (IsLE)
1877 return false;
1878 for (unsigned i = 0; i != 16; i += 2)
1879 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1880 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1881 return false;
1882 } else if (ShuffleKind == 2) {
1883 if (!IsLE)
1884 return false;
1885 for (unsigned i = 0; i != 16; i += 2)
1886 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1887 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1888 return false;
1889 } else if (ShuffleKind == 1) {
1890 unsigned j = IsLE ? 0 : 2;
1891 for (unsigned i = 0; i != 8; i += 2)
1892 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1893 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1894 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1895 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1896 return false;
1897 }
1898 return true;
1899}
1900
1901/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1902/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1903/// current subtarget.
1904///
1905/// The ShuffleKind distinguishes between big-endian operations with
1906/// two different inputs (0), either-endian operations with two identical
1907/// inputs (1), and little-endian operations with two different inputs (2).
1908/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1910 SelectionDAG &DAG) {
1911 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1912 if (!Subtarget.hasP8Vector())
1913 return false;
1914
1915 bool IsLE = DAG.getDataLayout().isLittleEndian();
1916 if (ShuffleKind == 0) {
1917 if (IsLE)
1918 return false;
1919 for (unsigned i = 0; i != 16; i += 4)
1920 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1921 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1922 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1923 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1924 return false;
1925 } else if (ShuffleKind == 2) {
1926 if (!IsLE)
1927 return false;
1928 for (unsigned i = 0; i != 16; i += 4)
1929 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1930 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1931 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1932 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1933 return false;
1934 } else if (ShuffleKind == 1) {
1935 unsigned j = IsLE ? 0 : 4;
1936 for (unsigned i = 0; i != 8; i += 4)
1937 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1938 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1939 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1940 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1941 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1942 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1943 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1944 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1945 return false;
1946 }
1947 return true;
1948}
1949
1950/// isVMerge - Common function, used to match vmrg* shuffles.
1951///
1952static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1953 unsigned LHSStart, unsigned RHSStart) {
1954 if (N->getValueType(0) != MVT::v16i8)
1955 return false;
1956 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1957 "Unsupported merge size!");
1958
1959 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1960 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1961 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1962 LHSStart+j+i*UnitSize) ||
1963 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1964 RHSStart+j+i*UnitSize))
1965 return false;
1966 }
1967 return true;
1968}
1969
1970/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1971/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1972/// The ShuffleKind distinguishes between big-endian merges with two
1973/// different inputs (0), either-endian merges with two identical inputs (1),
1974/// and little-endian merges with two different inputs (2). For the latter,
1975/// the input operands are swapped (see PPCInstrAltivec.td).
1977 unsigned ShuffleKind, SelectionDAG &DAG) {
1978 if (DAG.getDataLayout().isLittleEndian()) {
1979 if (ShuffleKind == 1) // unary
1980 return isVMerge(N, UnitSize, 0, 0);
1981 else if (ShuffleKind == 2) // swapped
1982 return isVMerge(N, UnitSize, 0, 16);
1983 else
1984 return false;
1985 } else {
1986 if (ShuffleKind == 1) // unary
1987 return isVMerge(N, UnitSize, 8, 8);
1988 else if (ShuffleKind == 0) // normal
1989 return isVMerge(N, UnitSize, 8, 24);
1990 else
1991 return false;
1992 }
1993}
1994
1995/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1996/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1997/// The ShuffleKind distinguishes between big-endian merges with two
1998/// different inputs (0), either-endian merges with two identical inputs (1),
1999/// and little-endian merges with two different inputs (2). For the latter,
2000/// the input operands are swapped (see PPCInstrAltivec.td).
2002 unsigned ShuffleKind, SelectionDAG &DAG) {
2003 if (DAG.getDataLayout().isLittleEndian()) {
2004 if (ShuffleKind == 1) // unary
2005 return isVMerge(N, UnitSize, 8, 8);
2006 else if (ShuffleKind == 2) // swapped
2007 return isVMerge(N, UnitSize, 8, 24);
2008 else
2009 return false;
2010 } else {
2011 if (ShuffleKind == 1) // unary
2012 return isVMerge(N, UnitSize, 0, 0);
2013 else if (ShuffleKind == 0) // normal
2014 return isVMerge(N, UnitSize, 0, 16);
2015 else
2016 return false;
2017 }
2018}
2019
2020/**
2021 * Common function used to match vmrgew and vmrgow shuffles
2022 *
2023 * The indexOffset determines whether to look for even or odd words in
2024 * the shuffle mask. This is based on the of the endianness of the target
2025 * machine.
2026 * - Little Endian:
2027 * - Use offset of 0 to check for odd elements
2028 * - Use offset of 4 to check for even elements
2029 * - Big Endian:
2030 * - Use offset of 0 to check for even elements
2031 * - Use offset of 4 to check for odd elements
2032 * A detailed description of the vector element ordering for little endian and
2033 * big endian can be found at
2034 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2035 * Targeting your applications - what little endian and big endian IBM XL C/C++
2036 * compiler differences mean to you
2037 *
2038 * The mask to the shuffle vector instruction specifies the indices of the
2039 * elements from the two input vectors to place in the result. The elements are
2040 * numbered in array-access order, starting with the first vector. These vectors
2041 * are always of type v16i8, thus each vector will contain 16 elements of size
2042 * 8. More info on the shuffle vector can be found in the
2043 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2044 * Language Reference.
2045 *
2046 * The RHSStartValue indicates whether the same input vectors are used (unary)
2047 * or two different input vectors are used, based on the following:
2048 * - If the instruction uses the same vector for both inputs, the range of the
2049 * indices will be 0 to 15. In this case, the RHSStart value passed should
2050 * be 0.
2051 * - If the instruction has two different vectors then the range of the
2052 * indices will be 0 to 31. In this case, the RHSStart value passed should
2053 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2054 * to 31 specify elements in the second vector).
2055 *
2056 * \param[in] N The shuffle vector SD Node to analyze
2057 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2058 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2059 * vector to the shuffle_vector instruction
2060 * \return true iff this shuffle vector represents an even or odd word merge
2061 */
2062static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2063 unsigned RHSStartValue) {
2064 if (N->getValueType(0) != MVT::v16i8)
2065 return false;
2066
2067 for (unsigned i = 0; i < 2; ++i)
2068 for (unsigned j = 0; j < 4; ++j)
2069 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2070 i*RHSStartValue+j+IndexOffset) ||
2071 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2072 i*RHSStartValue+j+IndexOffset+8))
2073 return false;
2074 return true;
2075}
2076
2077/**
2078 * Determine if the specified shuffle mask is suitable for the vmrgew or
2079 * vmrgow instructions.
2080 *
2081 * \param[in] N The shuffle vector SD Node to analyze
2082 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2083 * \param[in] ShuffleKind Identify the type of merge:
2084 * - 0 = big-endian merge with two different inputs;
2085 * - 1 = either-endian merge with two identical inputs;
2086 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2087 * little-endian merges).
2088 * \param[in] DAG The current SelectionDAG
2089 * \return true iff this shuffle mask
2090 */
2092 unsigned ShuffleKind, SelectionDAG &DAG) {
2093 if (DAG.getDataLayout().isLittleEndian()) {
2094 unsigned indexOffset = CheckEven ? 4 : 0;
2095 if (ShuffleKind == 1) // Unary
2096 return isVMerge(N, indexOffset, 0);
2097 else if (ShuffleKind == 2) // swapped
2098 return isVMerge(N, indexOffset, 16);
2099 else
2100 return false;
2101 }
2102 else {
2103 unsigned indexOffset = CheckEven ? 0 : 4;
2104 if (ShuffleKind == 1) // Unary
2105 return isVMerge(N, indexOffset, 0);
2106 else if (ShuffleKind == 0) // Normal
2107 return isVMerge(N, indexOffset, 16);
2108 else
2109 return false;
2110 }
2111 return false;
2112}
2113
2114/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2115/// amount, otherwise return -1.
2116/// The ShuffleKind distinguishes between big-endian operations with two
2117/// different inputs (0), either-endian operations with two identical inputs
2118/// (1), and little-endian operations with two different inputs (2). For the
2119/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2120int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2121 SelectionDAG &DAG) {
2122 if (N->getValueType(0) != MVT::v16i8)
2123 return -1;
2124
2126
2127 // Find the first non-undef value in the shuffle mask.
2128 unsigned i;
2129 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2130 /*search*/;
2131
2132 if (i == 16) return -1; // all undef.
2133
2134 // Otherwise, check to see if the rest of the elements are consecutively
2135 // numbered from this value.
2136 unsigned ShiftAmt = SVOp->getMaskElt(i);
2137 if (ShiftAmt < i) return -1;
2138
2139 ShiftAmt -= i;
2140 bool isLE = DAG.getDataLayout().isLittleEndian();
2141
2142 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2143 // Check the rest of the elements to see if they are consecutive.
2144 for (++i; i != 16; ++i)
2145 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2146 return -1;
2147 } else if (ShuffleKind == 1) {
2148 // Check the rest of the elements to see if they are consecutive.
2149 for (++i; i != 16; ++i)
2150 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2151 return -1;
2152 } else
2153 return -1;
2154
2155 if (isLE)
2156 ShiftAmt = 16 - ShiftAmt;
2157
2158 return ShiftAmt;
2159}
2160
2161/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2162/// specifies a splat of a single element that is suitable for input to
2163/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2165 EVT VT = N->getValueType(0);
2166 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2167 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2168
2169 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2170 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2171
2172 // The consecutive indices need to specify an element, not part of two
2173 // different elements. So abandon ship early if this isn't the case.
2174 if (N->getMaskElt(0) % EltSize != 0)
2175 return false;
2176
2177 // This is a splat operation if each element of the permute is the same, and
2178 // if the value doesn't reference the second vector.
2179 unsigned ElementBase = N->getMaskElt(0);
2180
2181 // FIXME: Handle UNDEF elements too!
2182 if (ElementBase >= 16)
2183 return false;
2184
2185 // Check that the indices are consecutive, in the case of a multi-byte element
2186 // splatted with a v16i8 mask.
2187 for (unsigned i = 1; i != EltSize; ++i)
2188 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2189 return false;
2190
2191 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2192 if (N->getMaskElt(i) < 0) continue;
2193 for (unsigned j = 0; j != EltSize; ++j)
2194 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2195 return false;
2196 }
2197 return true;
2198}
2199
2200/// Check that the mask is shuffling N byte elements. Within each N byte
2201/// element of the mask, the indices could be either in increasing or
2202/// decreasing order as long as they are consecutive.
2203/// \param[in] N the shuffle vector SD Node to analyze
2204/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2205/// Word/DoubleWord/QuadWord).
2206/// \param[in] StepLen the delta indices number among the N byte element, if
2207/// the mask is in increasing/decreasing order then it is 1/-1.
2208/// \return true iff the mask is shuffling N byte elements.
2209static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2210 int StepLen) {
2211 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2212 "Unexpected element width.");
2213 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2214
2215 unsigned NumOfElem = 16 / Width;
2216 unsigned MaskVal[16]; // Width is never greater than 16
2217 for (unsigned i = 0; i < NumOfElem; ++i) {
2218 MaskVal[0] = N->getMaskElt(i * Width);
2219 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2220 return false;
2221 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2222 return false;
2223 }
2224
2225 for (unsigned int j = 1; j < Width; ++j) {
2226 MaskVal[j] = N->getMaskElt(i * Width + j);
2227 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2228 return false;
2229 }
2230 }
2231 }
2232
2233 return true;
2234}
2235
2237 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2238 if (!isNByteElemShuffleMask(N, 4, 1))
2239 return false;
2240
2241 // Now we look at mask elements 0,4,8,12
2242 unsigned M0 = N->getMaskElt(0) / 4;
2243 unsigned M1 = N->getMaskElt(4) / 4;
2244 unsigned M2 = N->getMaskElt(8) / 4;
2245 unsigned M3 = N->getMaskElt(12) / 4;
2246 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2247 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2248
2249 // Below, let H and L be arbitrary elements of the shuffle mask
2250 // where H is in the range [4,7] and L is in the range [0,3].
2251 // H, 1, 2, 3 or L, 5, 6, 7
2252 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2253 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2255 InsertAtByte = IsLE ? 12 : 0;
2256 Swap = M0 < 4;
2257 return true;
2258 }
2259 // 0, H, 2, 3 or 4, L, 6, 7
2260 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2261 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2263 InsertAtByte = IsLE ? 8 : 4;
2264 Swap = M1 < 4;
2265 return true;
2266 }
2267 // 0, 1, H, 3 or 4, 5, L, 7
2268 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2269 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2271 InsertAtByte = IsLE ? 4 : 8;
2272 Swap = M2 < 4;
2273 return true;
2274 }
2275 // 0, 1, 2, H or 4, 5, 6, L
2276 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2277 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2279 InsertAtByte = IsLE ? 0 : 12;
2280 Swap = M3 < 4;
2281 return true;
2282 }
2283
2284 // If both vector operands for the shuffle are the same vector, the mask will
2285 // contain only elements from the first one and the second one will be undef.
2286 if (N->getOperand(1).isUndef()) {
2287 ShiftElts = 0;
2288 Swap = true;
2289 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2290 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2291 InsertAtByte = IsLE ? 12 : 0;
2292 return true;
2293 }
2294 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2295 InsertAtByte = IsLE ? 8 : 4;
2296 return true;
2297 }
2298 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2299 InsertAtByte = IsLE ? 4 : 8;
2300 return true;
2301 }
2302 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2303 InsertAtByte = IsLE ? 0 : 12;
2304 return true;
2305 }
2306 }
2307
2308 return false;
2309}
2310
2312 bool &Swap, bool IsLE) {
2313 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2314 // Ensure each byte index of the word is consecutive.
2315 if (!isNByteElemShuffleMask(N, 4, 1))
2316 return false;
2317
2318 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2319 unsigned M0 = N->getMaskElt(0) / 4;
2320 unsigned M1 = N->getMaskElt(4) / 4;
2321 unsigned M2 = N->getMaskElt(8) / 4;
2322 unsigned M3 = N->getMaskElt(12) / 4;
2323
2324 // If both vector operands for the shuffle are the same vector, the mask will
2325 // contain only elements from the first one and the second one will be undef.
2326 if (N->getOperand(1).isUndef()) {
2327 assert(M0 < 4 && "Indexing into an undef vector?");
2328 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2329 return false;
2330
2331 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2332 Swap = false;
2333 return true;
2334 }
2335
2336 // Ensure each word index of the ShuffleVector Mask is consecutive.
2337 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2338 return false;
2339
2340 if (IsLE) {
2341 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2342 // Input vectors don't need to be swapped if the leading element
2343 // of the result is one of the 3 left elements of the second vector
2344 // (or if there is no shift to be done at all).
2345 Swap = false;
2346 ShiftElts = (8 - M0) % 8;
2347 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2348 // Input vectors need to be swapped if the leading element
2349 // of the result is one of the 3 left elements of the first vector
2350 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2351 Swap = true;
2352 ShiftElts = (4 - M0) % 4;
2353 }
2354
2355 return true;
2356 } else { // BE
2357 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2358 // Input vectors don't need to be swapped if the leading element
2359 // of the result is one of the 4 elements of the first vector.
2360 Swap = false;
2361 ShiftElts = M0;
2362 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2363 // Input vectors need to be swapped if the leading element
2364 // of the result is one of the 4 elements of the right vector.
2365 Swap = true;
2366 ShiftElts = M0 - 4;
2367 }
2368
2369 return true;
2370 }
2371}
2372
2374 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2375
2376 if (!isNByteElemShuffleMask(N, Width, -1))
2377 return false;
2378
2379 for (int i = 0; i < 16; i += Width)
2380 if (N->getMaskElt(i) != i + Width - 1)
2381 return false;
2382
2383 return true;
2384}
2385
2389
2393
2397
2401
2402/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2403/// if the inputs to the instruction should be swapped and set \p DM to the
2404/// value for the immediate.
2405/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2406/// AND element 0 of the result comes from the first input (LE) or second input
2407/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2408/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2409/// mask.
2411 bool &Swap, bool IsLE) {
2412 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2413
2414 // Ensure each byte index of the double word is consecutive.
2415 if (!isNByteElemShuffleMask(N, 8, 1))
2416 return false;
2417
2418 unsigned M0 = N->getMaskElt(0) / 8;
2419 unsigned M1 = N->getMaskElt(8) / 8;
2420 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2421
2422 // If both vector operands for the shuffle are the same vector, the mask will
2423 // contain only elements from the first one and the second one will be undef.
2424 if (N->getOperand(1).isUndef()) {
2425 if ((M0 | M1) < 2) {
2426 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2427 Swap = false;
2428 return true;
2429 } else
2430 return false;
2431 }
2432
2433 if (IsLE) {
2434 if (M0 > 1 && M1 < 2) {
2435 Swap = false;
2436 } else if (M0 < 2 && M1 > 1) {
2437 M0 = (M0 + 2) % 4;
2438 M1 = (M1 + 2) % 4;
2439 Swap = true;
2440 } else
2441 return false;
2442
2443 // Note: if control flow comes here that means Swap is already set above
2444 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2445 return true;
2446 } else { // BE
2447 if (M0 < 2 && M1 > 1) {
2448 Swap = false;
2449 } else if (M0 > 1 && M1 < 2) {
2450 M0 = (M0 + 2) % 4;
2451 M1 = (M1 + 2) % 4;
2452 Swap = true;
2453 } else
2454 return false;
2455
2456 // Note: if control flow comes here that means Swap is already set above
2457 DM = (M0 << 1) + (M1 & 1);
2458 return true;
2459 }
2460}
2461
2462
2463/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2464/// appropriate for PPC mnemonics (which have a big endian bias - namely
2465/// elements are counted from the left of the vector register).
2466unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2467 SelectionDAG &DAG) {
2469 assert(isSplatShuffleMask(SVOp, EltSize));
2470 EVT VT = SVOp->getValueType(0);
2471
2472 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2473 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2474 : SVOp->getMaskElt(0);
2475
2476 if (DAG.getDataLayout().isLittleEndian())
2477 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2478 else
2479 return SVOp->getMaskElt(0) / EltSize;
2480}
2481
2482/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2483/// by using a vspltis[bhw] instruction of the specified element size, return
2484/// the constant being splatted. The ByteSize field indicates the number of
2485/// bytes of each element [124] -> [bhw].
2487 SDValue OpVal;
2488
2489 // If ByteSize of the splat is bigger than the element size of the
2490 // build_vector, then we have a case where we are checking for a splat where
2491 // multiple elements of the buildvector are folded together into a single
2492 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2493 unsigned EltSize = 16/N->getNumOperands();
2494 if (EltSize < ByteSize) {
2495 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2497 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2498
2499 // See if all of the elements in the buildvector agree across.
2500 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2501 if (N->getOperand(i).isUndef()) continue;
2502 // If the element isn't a constant, bail fully out.
2503 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2504
2505 if (!UniquedVals[i&(Multiple-1)].getNode())
2506 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2507 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2508 return SDValue(); // no match.
2509 }
2510
2511 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2512 // either constant or undef values that are identical for each chunk. See
2513 // if these chunks can form into a larger vspltis*.
2514
2515 // Check to see if all of the leading entries are either 0 or -1. If
2516 // neither, then this won't fit into the immediate field.
2517 bool LeadingZero = true;
2518 bool LeadingOnes = true;
2519 for (unsigned i = 0; i != Multiple-1; ++i) {
2520 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2521
2524 }
2525 // Finally, check the least significant entry.
2526 if (LeadingZero) {
2527 if (!UniquedVals[Multiple-1].getNode())
2528 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2529 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2530 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2531 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2532 }
2533 if (LeadingOnes) {
2534 if (!UniquedVals[Multiple-1].getNode())
2535 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2536 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2537 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2538 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2539 }
2540
2541 return SDValue();
2542 }
2543
2544 // Check to see if this buildvec has a single non-undef value in its elements.
2545 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2546 if (N->getOperand(i).isUndef()) continue;
2547 if (!OpVal.getNode())
2548 OpVal = N->getOperand(i);
2549 else if (OpVal != N->getOperand(i))
2550 return SDValue();
2551 }
2552
2553 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2554
2555 unsigned ValSizeInBytes = EltSize;
2556 uint64_t Value = 0;
2558 Value = CN->getZExtValue();
2560 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2561 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2562 }
2563
2564 // If the splat value is larger than the element value, then we can never do
2565 // this splat. The only case that we could fit the replicated bits into our
2566 // immediate field for would be zero, and we prefer to use vxor for it.
2567 if (ValSizeInBytes < ByteSize) return SDValue();
2568
2569 // If the element value is larger than the splat value, check if it consists
2570 // of a repeated bit pattern of size ByteSize.
2571 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2572 return SDValue();
2573
2574 // Properly sign extend the value.
2575 int MaskVal = SignExtend32(Value, ByteSize * 8);
2576
2577 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2578 if (MaskVal == 0) return SDValue();
2579
2580 // Finally, if this value fits in a 5 bit sext field, return it
2582 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2583 return SDValue();
2584}
2585
2586//===----------------------------------------------------------------------===//
2587// Addressing Mode Selection
2588//===----------------------------------------------------------------------===//
2589
2590/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2591/// or 64-bit immediate, and if the value can be accurately represented as a
2592/// sign extension from a 16-bit value. If so, this returns true and the
2593/// immediate.
2594bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2595 if (!isa<ConstantSDNode>(N))
2596 return false;
2597
2598 Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2599 if (N->getValueType(0) == MVT::i32)
2600 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2601 else
2602 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2603}
2605 return isIntS16Immediate(Op.getNode(), Imm);
2606}
2607
2608/// Used when computing address flags for selecting loads and stores.
2609/// If we have an OR, check if the LHS and RHS are provably disjoint.
2610/// An OR of two provably disjoint values is equivalent to an ADD.
2611/// Most PPC load/store instructions compute the effective address as a sum,
2612/// so doing this conversion is useful.
2613static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2614 if (N.getOpcode() != ISD::OR)
2615 return false;
2616 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2617 if (!LHSKnown.Zero.getBoolValue())
2618 return false;
2619 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2620 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2621}
2622
2623/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2624/// be represented as an indexed [r+r] operation.
2626 SDValue &Index,
2627 SelectionDAG &DAG) const {
2628 for (SDNode *U : N->uses()) {
2630 if (Memop->getMemoryVT() == MVT::f64) {
2631 Base = N.getOperand(0);
2632 Index = N.getOperand(1);
2633 return true;
2634 }
2635 }
2636 }
2637 return false;
2638}
2639
2640/// isIntS34Immediate - This method tests if value of node given can be
2641/// accurately represented as a sign extension from a 34-bit value. If so,
2642/// this returns true and the immediate.
2643bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2644 if (!isa<ConstantSDNode>(N))
2645 return false;
2646
2647 Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2648 return isInt<34>(Imm);
2649}
2651 return isIntS34Immediate(Op.getNode(), Imm);
2652}
2653
2654/// SelectAddressRegReg - Given the specified addressed, check to see if it
2655/// can be represented as an indexed [r+r] operation. Returns false if it
2656/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2657/// non-zero and N can be represented by a base register plus a signed 16-bit
2658/// displacement, make a more precise judgement by checking (displacement % \p
2659/// EncodingAlignment).
2663 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2664 // a [pc+imm].
2666 return false;
2667
2668 int16_t Imm = 0;
2669 if (N.getOpcode() == ISD::ADD) {
2670 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2671 // SPE load/store can only handle 8-bit offsets.
2672 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2673 return true;
2674 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2676 return false; // r+i
2677 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2678 return false; // r+i
2679
2680 Base = N.getOperand(0);
2681 Index = N.getOperand(1);
2682 return true;
2683 } else if (N.getOpcode() == ISD::OR) {
2684 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2686 return false; // r+i can fold it if we can.
2687
2688 // If this is an or of disjoint bitfields, we can codegen this as an add
2689 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2690 // disjoint.
2691 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2692
2693 if (LHSKnown.Zero.getBoolValue()) {
2694 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2695 // If all of the bits are known zero on the LHS or RHS, the add won't
2696 // carry.
2697 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2698 Base = N.getOperand(0);
2699 Index = N.getOperand(1);
2700 return true;
2701 }
2702 }
2703 }
2704
2705 return false;
2706}
2707
2708// If we happen to be doing an i64 load or store into a stack slot that has
2709// less than a 4-byte alignment, then the frame-index elimination may need to
2710// use an indexed load or store instruction (because the offset may not be a
2711// multiple of 4). The extra register needed to hold the offset comes from the
2712// register scavenger, and it is possible that the scavenger will need to use
2713// an emergency spill slot. As a result, we need to make sure that a spill slot
2714// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2715// stack slot.
2716static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2717 // FIXME: This does not handle the LWA case.
2718 if (VT != MVT::i64)
2719 return;
2720
2721 // NOTE: We'll exclude negative FIs here, which come from argument
2722 // lowering, because there are no known test cases triggering this problem
2723 // using packed structures (or similar). We can remove this exclusion if
2724 // we find such a test case. The reason why this is so test-case driven is
2725 // because this entire 'fixup' is only to prevent crashes (from the
2726 // register scavenger) on not-really-valid inputs. For example, if we have:
2727 // %a = alloca i1
2728 // %b = bitcast i1* %a to i64*
2729 // store i64* a, i64 b
2730 // then the store should really be marked as 'align 1', but is not. If it
2731 // were marked as 'align 1' then the indexed form would have been
2732 // instruction-selected initially, and the problem this 'fixup' is preventing
2733 // won't happen regardless.
2734 if (FrameIdx < 0)
2735 return;
2736
2738 MachineFrameInfo &MFI = MF.getFrameInfo();
2739
2740 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2741 return;
2742
2743 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2744 FuncInfo->setHasNonRISpills();
2745}
2746
2747/// Returns true if the address N can be represented by a base register plus
2748/// a signed 16-bit displacement [r+imm], and if it is not better
2749/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2750/// displacements that are multiples of that value.
2752 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2754 // FIXME dl should come from parent load or store, not from address
2755 SDLoc dl(N);
2756
2757 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2758 // a [pc+imm].
2760 return false;
2761
2762 // If this can be more profitably realized as r+r, fail.
2763 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2764 return false;
2765
2766 if (N.getOpcode() == ISD::ADD) {
2767 int16_t imm = 0;
2768 if (isIntS16Immediate(N.getOperand(1), imm) &&
2770 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2771 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2772 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2773 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2774 } else {
2775 Base = N.getOperand(0);
2776 }
2777 return true; // [r+i]
2778 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2779 // Match LOAD (ADD (X, Lo(G))).
2780 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2781 && "Cannot handle constant offsets yet!");
2782 Disp = N.getOperand(1).getOperand(0); // The global address.
2787 Base = N.getOperand(0);
2788 return true; // [&g+r]
2789 }
2790 } else if (N.getOpcode() == ISD::OR) {
2791 int16_t imm = 0;
2792 if (isIntS16Immediate(N.getOperand(1), imm) &&
2794 // If this is an or of disjoint bitfields, we can codegen this as an add
2795 // (for better address arithmetic) if the LHS and RHS of the OR are
2796 // provably disjoint.
2797 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2798
2799 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2800 // If all of the bits are known zero on the LHS or RHS, the add won't
2801 // carry.
2802 if (FrameIndexSDNode *FI =
2803 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2804 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2805 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2806 } else {
2807 Base = N.getOperand(0);
2808 }
2809 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2810 return true;
2811 }
2812 }
2814 // Loading from a constant address.
2815
2816 // If this address fits entirely in a 16-bit sext immediate field, codegen
2817 // this as "d, 0"
2818 int16_t Imm;
2819 if (isIntS16Immediate(CN, Imm) &&
2821 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2822 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2823 CN->getValueType(0));
2824 return true;
2825 }
2826
2827 // Handle 32-bit sext immediates with LIS + addr mode.
2828 if ((CN->getValueType(0) == MVT::i32 ||
2829 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2831 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2832 int Addr = (int)CN->getZExtValue();
2833
2834 // Otherwise, break this down into an LIS + disp.
2835 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2836
2837 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2838 MVT::i32);
2839 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2840 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2841 return true;
2842 }
2843 }
2844
2845 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2847 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2848 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2849 } else
2850 Base = N;
2851 return true; // [r+0]
2852}
2853
2854/// Similar to the 16-bit case but for instructions that take a 34-bit
2855/// displacement field (prefixed loads/stores).
2857 SDValue &Base,
2858 SelectionDAG &DAG) const {
2859 // Only on 64-bit targets.
2860 if (N.getValueType() != MVT::i64)
2861 return false;
2862
2863 SDLoc dl(N);
2864 int64_t Imm = 0;
2865
2866 if (N.getOpcode() == ISD::ADD) {
2867 if (!isIntS34Immediate(N.getOperand(1), Imm))
2868 return false;
2869 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2870 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2871 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2872 else
2873 Base = N.getOperand(0);
2874 return true;
2875 }
2876
2877 if (N.getOpcode() == ISD::OR) {
2878 if (!isIntS34Immediate(N.getOperand(1), Imm))
2879 return false;
2880 // If this is an or of disjoint bitfields, we can codegen this as an add
2881 // (for better address arithmetic) if the LHS and RHS of the OR are
2882 // provably disjoint.
2883 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2884 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2885 return false;
2886 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2887 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2888 else
2889 Base = N.getOperand(0);
2890 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2891 return true;
2892 }
2893
2894 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2895 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2896 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2897 return true;
2898 }
2899
2900 return false;
2901}
2902
2903/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2904/// represented as an indexed [r+r] operation.
2906 SDValue &Index,
2907 SelectionDAG &DAG) const {
2908 // Check to see if we can easily represent this as an [r+r] address. This
2909 // will fail if it thinks that the address is more profitably represented as
2910 // reg+imm, e.g. where imm = 0.
2911 if (SelectAddressRegReg(N, Base, Index, DAG))
2912 return true;
2913
2914 // If the address is the result of an add, we will utilize the fact that the
2915 // address calculation includes an implicit add. However, we can reduce
2916 // register pressure if we do not materialize a constant just for use as the
2917 // index register. We only get rid of the add if it is not an add of a
2918 // value and a 16-bit signed constant and both have a single use.
2919 int16_t imm = 0;
2920 if (N.getOpcode() == ISD::ADD &&
2921 (!isIntS16Immediate(N.getOperand(1), imm) ||
2922 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2923 Base = N.getOperand(0);
2924 Index = N.getOperand(1);
2925 return true;
2926 }
2927
2928 // Otherwise, do it the hard way, using R0 as the base register.
2929 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2930 N.getValueType());
2931 Index = N;
2932 return true;
2933}
2934
2935template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2936 Ty *PCRelCand = dyn_cast<Ty>(N);
2937 return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2938}
2939
2940/// Returns true if this address is a PC Relative address.
2941/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2942/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2944 // This is a materialize PC Relative node. Always select this as PC Relative.
2945 Base = N;
2946 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2947 return true;
2952 return true;
2953 return false;
2954}
2955
2956/// Returns true if we should use a direct load into vector instruction
2957/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2958static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2959
2960 // If there are any other uses other than scalar to vector, then we should
2961 // keep it as a scalar load -> direct move pattern to prevent multiple
2962 // loads.
2964 if (!LD)
2965 return false;
2966
2967 EVT MemVT = LD->getMemoryVT();
2968 if (!MemVT.isSimple())
2969 return false;
2970 switch(MemVT.getSimpleVT().SimpleTy) {
2971 case MVT::i64:
2972 break;
2973 case MVT::i32:
2974 if (!ST.hasP8Vector())
2975 return false;
2976 break;
2977 case MVT::i16:
2978 case MVT::i8:
2979 if (!ST.hasP9Vector())
2980 return false;
2981 break;
2982 default:
2983 return false;
2984 }
2985
2986 SDValue LoadedVal(N, 0);
2987 if (!LoadedVal.hasOneUse())
2988 return false;
2989
2990 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2991 UI != UE; ++UI)
2992 if (UI.getUse().get().getResNo() == 0 &&
2993 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2994 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2995 return false;
2996
2997 return true;
2998}
2999
3000/// getPreIndexedAddressParts - returns true by value, base pointer and
3001/// offset pointer and addressing mode by reference if the node's address
3002/// can be legally represented as pre-indexed load / store address.
3004 SDValue &Offset,
3006 SelectionDAG &DAG) const {
3007 if (DisablePPCPreinc) return false;
3008
3009 bool isLoad = true;
3010 SDValue Ptr;
3011 EVT VT;
3012 Align Alignment;
3013 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3014 Ptr = LD->getBasePtr();
3015 VT = LD->getMemoryVT();
3016 Alignment = LD->getAlign();
3017 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3018 Ptr = ST->getBasePtr();
3019 VT = ST->getMemoryVT();
3020 Alignment = ST->getAlign();
3021 isLoad = false;
3022 } else
3023 return false;
3024
3025 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3026 // instructions because we can fold these into a more efficient instruction
3027 // instead, (such as LXSD).
3028 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3029 return false;
3030 }
3031
3032 // PowerPC doesn't have preinc load/store instructions for vectors
3033 if (VT.isVector())
3034 return false;
3035
3036 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3037 // Common code will reject creating a pre-inc form if the base pointer
3038 // is a frame index, or if N is a store and the base pointer is either
3039 // the same as or a predecessor of the value being stored. Check for
3040 // those situations here, and try with swapped Base/Offset instead.
3041 bool Swap = false;
3042
3044 Swap = true;
3045 else if (!isLoad) {
3046 SDValue Val = cast<StoreSDNode>(N)->getValue();
3047 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3048 Swap = true;
3049 }
3050
3051 if (Swap)
3053
3054 AM = ISD::PRE_INC;
3055 return true;
3056 }
3057
3058 // LDU/STU can only handle immediates that are a multiple of 4.
3059 if (VT != MVT::i64) {
3060 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3061 return false;
3062 } else {
3063 // LDU/STU need an address with at least 4-byte alignment.
3064 if (Alignment < Align(4))
3065 return false;
3066
3067 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3068 return false;
3069 }
3070
3071 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3072 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3073 // sext i32 to i64 when addr mode is r+i.
3074 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3075 LD->getExtensionType() == ISD::SEXTLOAD &&
3077 return false;
3078 }
3079
3080 AM = ISD::PRE_INC;
3081 return true;
3082}
3083
3084//===----------------------------------------------------------------------===//
3085// LowerOperation implementation
3086//===----------------------------------------------------------------------===//
3087
3088/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3089/// and LoOpFlags to the target MO flags.
3090static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3091 unsigned &HiOpFlags, unsigned &LoOpFlags,
3092 const GlobalValue *GV = nullptr) {
3095
3096 // Don't use the pic base if not in PIC relocation model.
3097 if (IsPIC) {
3100 }
3101}
3102
3104 SelectionDAG &DAG) {
3105 SDLoc DL(HiPart);
3106 EVT PtrVT = HiPart.getValueType();
3107 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3108
3109 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3110 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3111
3112 // With PIC, the first instruction is actually "GR+hi(&G)".
3113 if (isPIC)
3114 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3116
3117 // Generate non-pic code that has direct accesses to the constant pool.
3118 // The address of the global is just (hi(&g)+lo(&g)).
3119 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3120}
3121
3123 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3124 FuncInfo->setUsesTOCBasePtr();
3125}
3126
3130
3131SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3132 SDValue GA) const {
3133 const bool Is64Bit = Subtarget.isPPC64();
3134 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3135 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3136 : Subtarget.isAIXABI()
3137 ? DAG.getRegister(PPC::R2, VT)
3138 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3139 SDValue Ops[] = { GA, Reg };
3140 return DAG.getMemIntrinsicNode(
3141 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3144}
3145
3146SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3147 SelectionDAG &DAG) const {
3148 EVT PtrVT = Op.getValueType();
3150 const Constant *C = CP->getConstVal();
3151
3152 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3153 // The actual address of the GlobalValue is stored in the TOC.
3154 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3155 if (Subtarget.isUsingPCRelativeCalls()) {
3156 SDLoc DL(CP);
3157 EVT Ty = getPointerTy(DAG.getDataLayout());
3158 SDValue ConstPool = DAG.getTargetConstantPool(
3159 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3160 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3161 }
3162 setUsesTOCBasePtr(DAG);
3163 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3164 return getTOCEntry(DAG, SDLoc(CP), GA);
3165 }
3166
3167 unsigned MOHiFlag, MOLoFlag;
3168 bool IsPIC = isPositionIndependent();
3169 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3170
3171 if (IsPIC && Subtarget.isSVR4ABI()) {
3172 SDValue GA =
3174 return getTOCEntry(DAG, SDLoc(CP), GA);
3175 }
3176
3177 SDValue CPIHi =
3178 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3179 SDValue CPILo =
3180 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3181 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3182}
3183
3184// For 64-bit PowerPC, prefer the more compact relative encodings.
3185// This trades 32 bits per jump table entry for one or two instructions
3186// on the jump site.
3193
3196 return false;
3197 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3198 return true;
3200}
3201
3203 SelectionDAG &DAG) const {
3204 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3206
3207 switch (getTargetMachine().getCodeModel()) {
3208 case CodeModel::Small:
3209 case CodeModel::Medium:
3211 default:
3212 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3214 }
3215}
3216
3217const MCExpr *
3219 unsigned JTI,
3220 MCContext &Ctx) const {
3221 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3223
3224 switch (getTargetMachine().getCodeModel()) {
3225 case CodeModel::Small:
3226 case CodeModel::Medium:
3228 default:
3229 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3230 }
3231}
3232
3233SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3234 EVT PtrVT = Op.getValueType();
3236
3237 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3238 if (Subtarget.isUsingPCRelativeCalls()) {
3239 SDLoc DL(JT);
3240 EVT Ty = getPointerTy(DAG.getDataLayout());
3241 SDValue GA =
3242 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3244 return MatAddr;
3245 }
3246
3247 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3248 // The actual address of the GlobalValue is stored in the TOC.
3249 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3250 setUsesTOCBasePtr(DAG);
3251 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3252 return getTOCEntry(DAG, SDLoc(JT), GA);
3253 }
3254
3255 unsigned MOHiFlag, MOLoFlag;
3256 bool IsPIC = isPositionIndependent();
3257 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3258
3259 if (IsPIC && Subtarget.isSVR4ABI()) {
3260 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3262 return getTOCEntry(DAG, SDLoc(GA), GA);
3263 }
3264
3265 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3266 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3267 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3268}
3269
3270SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3271 SelectionDAG &DAG) const {
3272 EVT PtrVT = Op.getValueType();
3274 const BlockAddress *BA = BASDN->getBlockAddress();
3275
3276 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3277 if (Subtarget.isUsingPCRelativeCalls()) {
3278 SDLoc DL(BASDN);
3279 EVT Ty = getPointerTy(DAG.getDataLayout());
3280 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3283 return MatAddr;
3284 }
3285
3286 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3287 // The actual BlockAddress is stored in the TOC.
3288 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3289 setUsesTOCBasePtr(DAG);
3290 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3291 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3292 }
3293
3294 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3295 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3296 return getTOCEntry(
3297 DAG, SDLoc(BASDN),
3298 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3299
3300 unsigned MOHiFlag, MOLoFlag;
3301 bool IsPIC = isPositionIndependent();
3302 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3305 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3306}
3307
3308SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3309 SelectionDAG &DAG) const {
3310 if (Subtarget.isAIXABI())
3311 return LowerGlobalTLSAddressAIX(Op, DAG);
3312
3313 return LowerGlobalTLSAddressLinux(Op, DAG);
3314}
3315
3316SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3317 SelectionDAG &DAG) const {
3319
3320 if (DAG.getTarget().useEmulatedTLS())
3321 report_fatal_error("Emulated TLS is not yet supported on AIX");
3322
3323 SDLoc dl(GA);
3324 const GlobalValue *GV = GA->getGlobal();
3326 bool Is64Bit = Subtarget.isPPC64();
3328
3329 if (Model == TLSModel::LocalExec) {
3332 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3333 SDValue TLSReg;
3334 if (Is64Bit)
3335 // For local-exec on AIX (64-bit), the sequence that is generated involves
3336 // a load of the variable offset (from the TOC), followed by an add of the
3337 // loaded variable offset to R13 (the thread pointer).
3338 // This code sequence looks like:
3339 // ld reg1,var[TC](2)
3340 // add reg2, reg1, r13 // r13 contains the thread pointer
3341 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3342 else
3343 // For local-exec on AIX (32-bit), the sequence that is generated involves
3344 // loading the variable offset from the TOC, generating a call to
3345 // .__get_tpointer to get the thread pointer (which will be in R3), and
3346 // adding the two together:
3347 // lwz reg1,var[TC](2)
3348 // bla .__get_tpointer
3349 // add reg2, reg1, r3
3350 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3351 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3352 }
3353
3354 // The Local-Exec and General-Dynamic TLS models are currently the only
3355 // supported access models. If Local-exec is not possible or specified, all
3356 // GlobalTLSAddress nodes are lowered using the general-dynamic model.
3357 // We need to generate two TOC entries, one for the variable offset, one for
3358 // the region handle. The global address for the TOC entry of the region
3359 // handle is created with the MO_TLSGDM_FLAG flag and the global address
3360 // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3365 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3366 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3368 RegionHandle);
3369}
3370
3371SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3372 SelectionDAG &DAG) const {
3373 // FIXME: TLS addresses currently use medium model code sequences,
3374 // which is the most useful form. Eventually support for small and
3375 // large models could be added if users need it, at the cost of
3376 // additional complexity.
3378 if (DAG.getTarget().useEmulatedTLS())
3379 return LowerToTLSEmulatedModel(GA, DAG);
3380
3381 SDLoc dl(GA);
3382 const GlobalValue *GV = GA->getGlobal();
3384 bool is64bit = Subtarget.isPPC64();
3385 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3386 PICLevel::Level picLevel = M->getPICLevel();
3387
3389 TLSModel::Model Model = TM.getTLSModel(GV);
3390
3391 if (Model == TLSModel::LocalExec) {
3392 if (Subtarget.isUsingPCRelativeCalls()) {
3393 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3398 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3399 }
3400
3401 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3403 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3405 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3406 : DAG.getRegister(PPC::R2, MVT::i32);
3407
3408 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3409 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3410 }
3411
3412 if (Model == TLSModel::InitialExec) {
3413 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3415 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3417 GV, dl, PtrVT, 0,
3418 IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3420 if (IsPCRel) {
3422 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3424 } else {
3426 if (is64bit) {
3427 setUsesTOCBasePtr(DAG);
3428 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3429 GOTPtr =
3431 } else {
3432 if (!TM.isPositionIndependent())
3434 else if (picLevel == PICLevel::SmallPIC)
3436 else
3438 }
3440 }
3441 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3442 }
3443
3444 if (Model == TLSModel::GeneralDynamic) {
3445 if (Subtarget.isUsingPCRelativeCalls()) {
3446 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3449 }
3450
3451 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3453 if (is64bit) {
3454 setUsesTOCBasePtr(DAG);
3455 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3457 GOTReg, TGA);
3458 } else {
3461 else
3463 }
3464 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3465 GOTPtr, TGA, TGA);
3466 }
3467
3468 if (Model == TLSModel::LocalDynamic) {
3469 if (Subtarget.isUsingPCRelativeCalls()) {
3470 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3474 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3475 }
3476
3477 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3479 if (is64bit) {
3480 setUsesTOCBasePtr(DAG);
3481 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3483 GOTReg, TGA);
3484 } else {
3487 else
3489 }
3491 PtrVT, GOTPtr, TGA, TGA);
3493 PtrVT, TLSAddr, TGA);
3495 }
3496
3497 llvm_unreachable("Unknown TLS model!");
3498}
3499
3500SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3501 SelectionDAG &DAG) const {
3502 EVT PtrVT = Op.getValueType();
3504 SDLoc DL(GSDN);
3505 const GlobalValue *GV = GSDN->getGlobal();
3506
3507 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3508 // The actual address of the GlobalValue is stored in the TOC.
3509 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3510 if (Subtarget.isUsingPCRelativeCalls()) {
3511 EVT Ty = getPointerTy(DAG.getDataLayout());
3513 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3517 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3519 return Load;
3520 } else {
3521 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3523 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3524 }
3525 }
3526 setUsesTOCBasePtr(DAG);
3527 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3528 return getTOCEntry(DAG, DL, GA);
3529 }
3530
3531 unsigned MOHiFlag, MOLoFlag;
3532 bool IsPIC = isPositionIndependent();
3533 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3534
3535 if (IsPIC && Subtarget.isSVR4ABI()) {
3537 GSDN->getOffset(),
3539 return getTOCEntry(DAG, DL, GA);
3540 }
3541
3542 SDValue GAHi =
3543 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3544 SDValue GALo =
3545 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3546
3547 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3548}
3549
3550SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3551 bool IsStrict = Op->isStrictFPOpcode();
3553 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3554 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3555 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3556 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3557 EVT LHSVT = LHS.getValueType();
3558 SDLoc dl(Op);
3559
3560 // Soften the setcc with libcall if it is fp128.
3561 if (LHSVT == MVT::f128) {
3562 assert(!Subtarget.hasP9Vector() &&
3563 "SETCC for f128 is already legal under Power9!");
3564 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3565 Op->getOpcode() == ISD::STRICT_FSETCCS);
3566 if (RHS.getNode())
3567 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3568 DAG.getCondCode(CC));
3569 if (IsStrict)
3570 return DAG.getMergeValues({LHS, Chain}, dl);
3571 return LHS;
3572 }
3573
3574 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3575
3576 if (Op.getValueType() == MVT::v2i64) {
3577 // When the operands themselves are v2i64 values, we need to do something
3578 // special because VSX has no underlying comparison operations for these.
3579 if (LHS.getValueType() == MVT::v2i64) {
3580 // Equality can be handled by casting to the legal type for Altivec
3581 // comparisons, everything else needs to be expanded.
3582 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3583 return SDValue();
3584 SDValue SetCC32 = DAG.getSetCC(
3585 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3586 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3587 int ShuffV[] = {1, 0, 3, 2};
3588 SDValue Shuff =
3589 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3590 return DAG.getBitcast(MVT::v2i64,
3591 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3592 dl, MVT::v4i32, Shuff, SetCC32));
3593 }
3594
3595 // We handle most of these in the usual way.
3596 return Op;
3597 }
3598
3599 // If we're comparing for equality to zero, expose the fact that this is
3600 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3601 // fold the new nodes.
3602 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3603 return V;
3604
3606 // Leave comparisons against 0 and -1 alone for now, since they're usually
3607 // optimized. FIXME: revisit this when we can custom lower all setcc
3608 // optimizations.
3609 if (C->isAllOnes() || C->isZero())
3610 return SDValue();
3611 }
3612
3613 // If we have an integer seteq/setne, turn it into a compare against zero
3614 // by xor'ing the rhs with the lhs, which is faster than setting a
3615 // condition register, reading it back out, and masking the correct bit. The
3616 // normal approach here uses sub to do this instead of xor. Using xor exposes
3617 // the result to other bit-twiddling opportunities.
3618 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3619 EVT VT = Op.getValueType();
3620 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3621 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3622 }
3623 return SDValue();
3624}
3625
3626SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3627 SDNode *Node = Op.getNode();
3628 EVT VT = Node->getValueType(0);
3630 SDValue InChain = Node->getOperand(0);
3631 SDValue VAListPtr = Node->getOperand(1);
3632 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3633 SDLoc dl(Node);
3634
3635 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3636
3637 // gpr_index
3638 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3639 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3640 InChain = GprIndex.getValue(1);
3641
3642 if (VT == MVT::i64) {
3643 // Check if GprIndex is even
3644 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3645 DAG.getConstant(1, dl, MVT::i32));
3646 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3647 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3648 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3649 DAG.getConstant(1, dl, MVT::i32));
3650 // Align GprIndex to be even if it isn't
3651 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3652 GprIndex);
3653 }
3654
3655 // fpr index is 1 byte after gpr
3657 DAG.getConstant(1, dl, MVT::i32));
3658
3659 // fpr
3660 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3661 FprPtr, MachinePointerInfo(SV), MVT::i8);
3662 InChain = FprIndex.getValue(1);
3663
3665 DAG.getConstant(8, dl, MVT::i32));
3666
3668 DAG.getConstant(4, dl, MVT::i32));
3669
3670 // areas
3672 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3673 InChain = OverflowArea.getValue(1);
3674
3676 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3677 InChain = RegSaveArea.getValue(1);
3678
3679 // select overflow_area if index > 8
3680 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3681 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3682
3683 // adjustment constant gpr_index * 4/8
3684 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3685 VT.isInteger() ? GprIndex : FprIndex,
3686 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3687 MVT::i32));
3688
3689 // OurReg = RegSaveArea + RegConstant
3691 RegConstant);
3692
3693 // Floating types are 32 bytes into RegSaveArea
3694 if (VT.isFloatingPoint())
3695 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3696 DAG.getConstant(32, dl, MVT::i32));
3697
3698 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3699 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3700 VT.isInteger() ? GprIndex : FprIndex,
3701 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3702 MVT::i32));
3703
3705 VT.isInteger() ? VAListPtr : FprPtr,
3706 MachinePointerInfo(SV), MVT::i8);
3707
3708 // determine if we should load from reg_save_area or overflow_area
3710
3711 // increase overflow_area by 4/8 if gpr/fpr > 8
3713 DAG.getConstant(VT.isInteger() ? 4 : 8,
3714 dl, MVT::i32));
3715
3716 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3718
3720 MachinePointerInfo(), MVT::i32);
3721
3722 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3723}
3724
3725SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3726 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3727
3728 // We have to copy the entire va_list struct:
3729 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3730 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3731 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3732 false, true, false, MachinePointerInfo(),
3734}
3735
3736SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3737 SelectionDAG &DAG) const {
3738 if (Subtarget.isAIXABI())
3739 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3740
3741 return Op.getOperand(0);
3742}
3743
3744SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3747
3748 assert((Op.getOpcode() == ISD::INLINEASM ||
3749 Op.getOpcode() == ISD::INLINEASM_BR) &&
3750 "Expecting Inline ASM node.");
3751
3752 // If an LR store is already known to be required then there is not point in
3753 // checking this ASM as well.
3754 if (MFI.isLRStoreRequired())
3755 return Op;
3756
3757 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3758 // type MVT::Glue. We want to ignore this last operand if that is the case.
3759 unsigned NumOps = Op.getNumOperands();
3760 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3761 --NumOps;
3762
3763 // Check all operands that may contain the LR.
3764 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3765 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3767 ++i; // Skip the ID value.
3768
3769 switch (InlineAsm::getKind(Flags)) {
3770 default:
3771 llvm_unreachable("Bad flags!");
3775 i += NumVals;
3776 break;
3780 for (; NumVals; --NumVals, ++i) {
3781 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3782 if (Reg != PPC::LR && Reg != PPC::LR8)
3783 continue;
3784 MFI.setLRStoreRequired();
3785 return Op;
3786 }
3787 break;
3788 }
3789 }
3790 }
3791
3792 return Op;
3793}
3794
3795SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3796 SelectionDAG &DAG) const {
3797 if (Subtarget.isAIXABI())
3798 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3799
3800 SDValue Chain = Op.getOperand(0);
3801 SDValue Trmp = Op.getOperand(1); // trampoline
3802 SDValue FPtr = Op.getOperand(2); // nested function
3803 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3804 SDLoc dl(Op);
3805
3807 bool isPPC64 = (PtrVT == MVT::i64);
3808 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3809
3812
3813 Entry.Ty = IntPtrTy;
3814 Entry.Node = Trmp; Args.push_back(Entry);
3815
3816 // TrampSize == (isPPC64 ? 48 : 40);
3817 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3818 isPPC64 ? MVT::i64 : MVT::i32);
3819 Args.push_back(Entry);
3820
3821 Entry.Node = FPtr; Args.push_back(Entry);
3822 Entry.Node = Nest; Args.push_back(Entry);
3823
3824 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3826 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3828 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3829
3830 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3831 return CallResult.second;
3832}
3833
3834SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3836 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3838
3839 SDLoc dl(Op);
3840
3841 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3842 // vastart just stores the address of the VarArgsFrameIndex slot into the
3843 // memory location argument.
3845 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3846 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3847 MachinePointerInfo(SV));
3848 }
3849
3850 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3851 // We suppose the given va_list is already allocated.
3852 //
3853 // typedef struct {
3854 // char gpr; /* index into the array of 8 GPRs
3855 // * stored in the register save area
3856 // * gpr=0 corresponds to r3,
3857 // * gpr=1 to r4, etc.
3858 // */
3859 // char fpr; /* index into the array of 8 FPRs
3860 // * stored in the register save area
3861 // * fpr=0 corresponds to f1,
3862 // * fpr=1 to f2, etc.
3863 // */
3864 // char *overflow_arg_area;
3865 // /* location on stack that holds
3866 // * the next overflow argument
3867 // */
3868 // char *reg_save_area;
3869 // /* where r3:r10 and f1:f8 (if saved)
3870 // * are stored
3871 // */
3872 // } va_list[1];
3873
3874 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3875 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3877 PtrVT);
3878 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3879 PtrVT);
3880
3881 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3882 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3883
3884 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3886
3887 uint64_t FPROffset = 1;
3888 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3889
3890 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3891
3892 // Store first byte : number of int regs
3894 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3895 MachinePointerInfo(SV), MVT::i8);
3896 uint64_t nextOffset = FPROffset;
3897 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3899
3900 // Store second byte : number of float regs
3903 MachinePointerInfo(SV, nextOffset), MVT::i8);
3906
3907 // Store second word : arguments given on stack
3910 nextOffset += FrameOffset;
3912
3913 // Store third word : arguments given in registers
3914 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3916}
3917
3918/// FPR - The set of FP registers that should be allocated for arguments
3919/// on Darwin and AIX.
3920static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3921 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3922 PPC::F11, PPC::F12, PPC::F13};
3923
3924/// CalculateStackSlotSize - Calculates the size reserved for this argument on
3925/// the stack.
3926static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3927 unsigned PtrByteSize) {
3928 unsigned ArgSize = ArgVT.getStoreSize();
3929 if (Flags.isByVal())
3930 ArgSize = Flags.getByValSize();
3931
3932 // Round up to multiples of the pointer size, except for array members,
3933 // which are always packed.
3934 if (!Flags.isInConsecutiveRegs())
3936
3937 return ArgSize;
3938}
3939
3940/// CalculateStackSlotAlignment - Calculates the alignment of this argument
3941/// on the stack.
3943 ISD::ArgFlagsTy Flags,
3944 unsigned PtrByteSize) {
3945 Align Alignment(PtrByteSize);
3946
3947 // Altivec parameters are padded to a 16 byte boundary.
3948 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3949 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3950 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3951 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3952 Alignment = Align(16);
3953
3954 // ByVal parameters are aligned as requested.
3955 if (Flags.isByVal()) {
3956 auto BVAlign = Flags.getNonZeroByValAlign();
3957 if (BVAlign > PtrByteSize) {
3958 if (BVAlign.value() % PtrByteSize != 0)
3960 "ByVal alignment is not a multiple of the pointer size");
3961
3962 Alignment = BVAlign;
3963 }
3964 }
3965
3966 // Array members are always packed to their original alignment.
3967 if (Flags.isInConsecutiveRegs()) {
3968 // If the array member was split into multiple registers, the first
3969 // needs to be aligned to the size of the full type. (Except for
3970 // ppcf128, which is only aligned as its f64 components.)
3971 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3972 Alignment = Align(OrigVT.getStoreSize());
3973 else
3974 Alignment = Align(ArgVT.getStoreSize());
3975 }
3976
3977 return Alignment;
3978}
3979
3980/// CalculateStackSlotUsed - Return whether this argument will use its
3981/// stack slot (instead of being passed in registers). ArgOffset,
3982/// AvailableFPRs, and AvailableVRs must hold the current argument
3983/// position, and will be updated to account for this argument.
3985 unsigned PtrByteSize, unsigned LinkageSize,
3986 unsigned ParamAreaSize, unsigned &ArgOffset,
3987 unsigned &AvailableFPRs,
3988 unsigned &AvailableVRs) {
3989 bool UseMemory = false;
3990
3991 // Respect alignment of argument on the stack.
3992 Align Alignment =
3994 ArgOffset = alignTo(ArgOffset, Alignment);
3995 // If there's no space left in the argument save area, we must
3996 // use memory (this check also catches zero-sized arguments).
3997 if (ArgOffset >= LinkageSize + ParamAreaSize)
3998 UseMemory = true;
3999
4000 // Allocate argument on the stack.
4002 if (Flags.isInConsecutiveRegsLast())
4004 // If we overran the argument save area, we must use memory
4005 // (this check catches arguments passed partially in memory)
4006 if (ArgOffset > LinkageSize + ParamAreaSize)
4007 UseMemory = true;
4008
4009 // However, if the argument is actually passed in an FPR or a VR,
4010 // we don't use memory after all.
4011 if (!Flags.isByVal()) {
4012 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4013 if (AvailableFPRs > 0) {
4014 --AvailableFPRs;
4015 return false;
4016 }
4017 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4018 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4019 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4020 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4021 if (AvailableVRs > 0) {
4022 --AvailableVRs;
4023 return false;
4024 }
4025 }
4026
4027 return UseMemory;
4028}
4029
4030/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4031/// ensure minimum alignment required for target.
4033 unsigned NumBytes) {
4034 return alignTo(NumBytes, Lowering->getStackAlign());
4035}
4036
4037SDValue PPCTargetLowering::LowerFormalArguments(
4038 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4039 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4040 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4041 if (Subtarget.isAIXABI())
4042 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4043 InVals);
4044 if (Subtarget.is64BitELFABI())
4045 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4046 InVals);
4047 assert(Subtarget.is32BitELFABI());
4048 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4049 InVals);
4050}
4051
4052SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4053 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4054 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4055 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4056
4057 // 32-bit SVR4 ABI Stack Frame Layout:
4058 // +-----------------------------------+
4059 // +--> | Back chain |
4060 // | +-----------------------------------+
4061 // | | Floating-point register save area |
4062 // | +-----------------------------------+
4063 // | | General register save area |
4064 // | +-----------------------------------+
4065 // | | CR save word |
4066 // | +-----------------------------------+
4067 // | | VRSAVE save word |
4068 // | +-----------------------------------+
4069 // | | Alignment padding |
4070 // | +-----------------------------------+
4071 // | | Vector register save area |
4072 // | +-----------------------------------+
4073 // | | Local variable space |
4074 // | +-----------------------------------+
4075 // | | Parameter list area |
4076 // | +-----------------------------------+
4077 // | | LR save word |
4078 // | +-----------------------------------+
4079 // SP--> +--- | Back chain |
4080 // +-----------------------------------+
4081 //
4082 // Specifications:
4083 // System V Application Binary Interface PowerPC Processor Supplement
4084 // AltiVec Technology Programming Interface Manual
4085
4087 MachineFrameInfo &MFI = MF.getFrameInfo();
4088 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4089
4091 // Potential tail calls could cause overwriting of argument stack slots.
4092 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4093 (CallConv == CallingConv::Fast));
4094 const Align PtrAlign(4);
4095
4096 // Assign locations to all of the incoming arguments.
4098 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4099 *DAG.getContext());
4100
4101 // Reserve space for the linkage area on the stack.
4102 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4103 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4104 if (useSoftFloat())
4105 CCInfo.PreAnalyzeFormalArguments(Ins);
4106
4107 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4108 CCInfo.clearWasPPCF128();
4109
4110 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4111 CCValAssign &VA = ArgLocs[i];
4112
4113 // Arguments stored in registers.
4114 if (VA.isRegLoc()) {
4115 const TargetRegisterClass *RC;
4116 EVT ValVT = VA.getValVT();
4117
4118 switch (ValVT.getSimpleVT().SimpleTy) {
4119 default:
4120 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4121 case MVT::i1:
4122 case MVT::i32:
4123 RC = &PPC::GPRCRegClass;
4124 break;
4125 case MVT::f32:
4126 if (Subtarget.hasP8Vector())
4127 RC = &PPC::VSSRCRegClass;
4128 else if (Subtarget.hasSPE())
4129 RC = &PPC::GPRCRegClass;
4130 else
4131 RC = &PPC::F4RCRegClass;
4132 break;
4133 case MVT::f64:
4134 if (Subtarget.hasVSX())
4135 RC = &PPC::VSFRCRegClass;
4136 else if (Subtarget.hasSPE())
4137 // SPE passes doubles in GPR pairs.
4138 RC = &PPC::GPRCRegClass;
4139 else
4140 RC = &PPC::F8RCRegClass;
4141 break;
4142 case MVT::v16i8:
4143 case MVT::v8i16:
4144 case MVT::v4i32:
4145 RC = &PPC::VRRCRegClass;
4146 break;
4147 case MVT::v4f32:
4148 RC = &PPC::VRRCRegClass;
4149 break;
4150 case MVT::v2f64:
4151 case MVT::v2i64:
4152 RC = &PPC::VRRCRegClass;
4153 break;
4154 }
4155
4156 SDValue ArgValue;
4157 // Transform the arguments stored in physical registers into
4158 // virtual ones.
4159 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4160 assert(i + 1 < e && "No second half of double precision argument");
4161 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4162 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4163 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4164 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4165 if (!Subtarget.isLittleEndian())
4167 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4168 ArgValueHi);
4169 } else {
4170 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4171 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4172 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4173 if (ValVT == MVT::i1)
4174 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4175 }
4176
4177 InVals.push_back(ArgValue);
4178 } else {
4179 // Argument stored in memory.
4180 assert(VA.isMemLoc());
4181
4182 // Get the extended size of the argument type in stack
4183 unsigned ArgSize = VA.getLocVT().getStoreSize();
4184 // Get the actual size of the argument type
4185 unsigned ObjSize = VA.getValVT().getStoreSize();
4186 unsigned ArgOffset = VA.getLocMemOffset();
4187 // Stack objects in PPC32 are right justified.
4189 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4190
4191 // Create load nodes to retrieve arguments from the stack.
4192 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4193 InVals.push_back(
4194 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4195 }
4196 }
4197
4198 // Assign locations to all of the incoming aggregate by value arguments.
4199 // Aggregates passed by value are stored in the local variable space of the
4200 // caller's stack frame, right above the parameter list area.
4202 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4203 ByValArgLocs, *DAG.getContext());
4204
4205 // Reserve stack space for the allocations in CCInfo.
4206 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4207
4208 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4209
4210 // Area that is at least reserved in the caller of this function.
4211 unsigned MinReservedArea = CCByValInfo.getStackSize();
4212 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4213
4214 // Set the size that is at least reserved in caller of this function. Tail
4215 // call optimized function's reserved stack space needs to be aligned so that
4216 // taking the difference between two stack areas will result in an aligned
4217 // stack.
4218 MinReservedArea =
4219 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4220 FuncInfo->setMinReservedArea(MinReservedArea);
4221
4223
4224 // If the function takes variable number of arguments, make a frame index for
4225 // the start of the first vararg value... for expansion of llvm.va_start.
4226 if (isVarArg) {
4227 static const MCPhysReg GPArgRegs[] = {
4228 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4229 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4230 };
4231 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4232
4233 static const MCPhysReg FPArgRegs[] = {
4234 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4235 PPC::F8
4236 };
4237 unsigned NumFPArgRegs = std::size(FPArgRegs);
4238
4239 if (useSoftFloat() || hasSPE())
4240 NumFPArgRegs = 0;
4241
4242 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4243 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4244
4245 // Make room for NumGPArgRegs and NumFPArgRegs.
4246 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4247 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4248
4250 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4251
4252 FuncInfo->setVarArgsFrameIndex(
4253 MFI.CreateStackObject(Depth, Align(8), false));
4255
4256 // The fixed integer arguments of a variadic function are stored to the
4257 // VarArgsFrameIndex on the stack so that they may be loaded by
4258 // dereferencing the result of va_next.
4259 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4260 // Get an existing live-in vreg, or add a new one.
4262 if (!VReg)
4263 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4264
4265 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4266 SDValue Store =
4267 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4268 MemOps.push_back(Store);
4269 // Increment the address by four for the next argument to store
4270 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4271 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4272 }
4273
4274 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4275 // is set.
4276 // The double arguments are stored to the VarArgsFrameIndex
4277 // on the stack.
4278 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4279 // Get an existing live-in vreg, or add a new one.
4281 if (!VReg)
4282 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4283
4284 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4285 SDValue Store =
4286 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4287 MemOps.push_back(Store);
4288 // Increment the address by eight for the next argument to store
4289 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4290 PtrVT);
4291 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4292 }
4293 }
4294
4295 if (!MemOps.empty())
4296 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4297
4298 return Chain;
4299}
4300
4301// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4302// value to MVT::i64 and then truncate to the correct register size.
4303SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4306 const SDLoc &dl) const {
4307 if (Flags.isSExt())
4308 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4309 DAG.getValueType(ObjectVT));
4310 else if (Flags.isZExt())
4311 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4312 DAG.getValueType(ObjectVT));
4313
4314 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4315}
4316
4317SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4318 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4319 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4320 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4321 // TODO: add description of PPC stack frame format, or at least some docs.
4322 //
4323 bool isELFv2ABI = Subtarget.isELFv2ABI();
4324 bool isLittleEndian = Subtarget.isLittleEndian();
4326 MachineFrameInfo &MFI = MF.getFrameInfo();
4327 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4328
4329 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4330 "fastcc not supported on varargs functions");
4331
4333 // Potential tail calls could cause overwriting of argument stack slots.
4334 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4335 (CallConv == CallingConv::Fast));
4336 unsigned PtrByteSize = 8;
4337 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4338
4339 static const MCPhysReg GPR[] = {
4340 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4341 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4342 };
4343 static const MCPhysReg VR[] = {
4344 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4345 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4346 };
4347
4348 const unsigned Num_GPR_Regs = std::size(GPR);
4349 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4350 const unsigned Num_VR_Regs = std::size(VR);
4351
4352 // Do a first pass over the arguments to determine whether the ABI
4353 // guarantees that our caller has allocated the parameter save area
4354 // on its stack frame. In the ELFv1 ABI, this is always the case;
4355 // in the ELFv2 ABI, it is true if this is a vararg function or if
4356 // any parameter is located in a stack slot.
4357
4358 bool HasParameterArea = !isELFv2ABI || isVarArg;
4360 unsigned NumBytes = LinkageSize;
4361 unsigned AvailableFPRs = Num_FPR_Regs;
4362 unsigned AvailableVRs = Num_VR_Regs;
4363 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4364 if (Ins[i].Flags.isNest())
4365 continue;
4366
4367 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4368 PtrByteSize, LinkageSize, ParamAreaSize,
4369 NumBytes, AvailableFPRs, AvailableVRs))
4370 HasParameterArea = true;
4371 }
4372
4373 // Add DAG nodes to load the arguments or copy them out of registers. On
4374 // entry to a function on PPC, the arguments start after the linkage area,
4375 // although the first ones are often in registers.
4376
4377 unsigned ArgOffset = LinkageSize;
4378 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4381 unsigned CurArgIdx = 0;
4382 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4384 bool needsLoad = false;
4385 EVT ObjectVT = Ins[ArgNo].VT;
4386 EVT OrigVT = Ins[ArgNo].ArgVT;
4387 unsigned ObjSize = ObjectVT.getStoreSize();
4388 unsigned ArgSize = ObjSize;
4389 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4390 if (Ins[ArgNo].isOrigArg()) {
4391 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4392 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4393 }
4394 // We re-align the argument offset for each argument, except when using the
4395 // fast calling convention, when we need to make sure we do that only when
4396 // we'll actually use a stack slot.
4397 unsigned CurArgOffset;
4398 Align Alignment;
4399 auto ComputeArgOffset = [&]() {
4400 /* Respect alignment of argument on the stack. */
4401 Alignment =
4403 ArgOffset = alignTo(ArgOffset, Alignment);
4405 };
4406
4407 if (CallConv != CallingConv::Fast) {
4409
4410 /* Compute GPR index associated with argument offset. */
4411 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4412 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4413 }
4414
4415 // FIXME the codegen can be much improved in some cases.
4416 // We do not have to keep everything in memory.
4417 if (Flags.isByVal()) {
4418 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4419
4420 if (CallConv == CallingConv::Fast)
4422
4423 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4424 ObjSize = Flags.getByValSize();
4426 // Empty aggregate parameters do not take up registers. Examples:
4427 // struct { } a;
4428 // union { } b;
4429 // int c[0];
4430 // etc. However, we have to provide a place-holder in InVals, so
4431 // pretend we have an 8-byte item at the current address for that
4432 // purpose.
4433 if (!ObjSize) {
4434 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4435 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4436 InVals.push_back(FIN);
4437 continue;
4438 }
4439
4440 // Create a stack object covering all stack doublewords occupied
4441 // by the argument. If the argument is (fully or partially) on
4442 // the stack, or if the argument is fully in registers but the
4443 // caller has allocated the parameter save anyway, we can refer
4444 // directly to the caller's stack frame. Otherwise, create a
4445 // local copy in our own frame.
4446 int FI;
4447 if (HasParameterArea ||
4448 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4449 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4450 else
4451 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4452 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4453
4454 // Handle aggregates smaller than 8 bytes.
4455 if (ObjSize < PtrByteSize) {
4456 // The value of the object is its address, which differs from the
4457 // address of the enclosing doubleword on big-endian systems.
4458 SDValue Arg = FIN;
4459 if (!isLittleEndian) {
4461 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4462 }
4463 InVals.push_back(Arg);
4464
4465 if (GPR_idx != Num_GPR_Regs) {
4466 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4467 FuncInfo->addLiveInAttr(VReg, Flags);
4468 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4470 SDValue Store =
4471 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4473 MemOps.push_back(Store);
4474 }
4475 // Whether we copied from a register or not, advance the offset
4476 // into the parameter save area by a full doubleword.
4478 continue;
4479 }
4480
4481 // The value of the object is its address, which is the address of
4482 // its first stack doubleword.
4483 InVals.push_back(FIN);
4484
4485 // Store whatever pieces of the object are in registers to memory.
4486 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4487 if (GPR_idx == Num_GPR_Regs)
4488 break;
4489
4490 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4491 FuncInfo->addLiveInAttr(VReg, Flags);
4492 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4493 SDValue Addr = FIN;
4494 if (j) {
4495 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4496 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4497 }
4498 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4500 SDValue Store =
4501 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4503 MemOps.push_back(Store);
4504 ++GPR_idx;
4505 }
4506 ArgOffset += ArgSize;
4507 continue;
4508 }
4509
4510 switch (ObjectVT.getSimpleVT().SimpleTy) {
4511 default: llvm_unreachable("Unhandled argument type!");
4512 case MVT::i1:
4513 case MVT::i32:
4514 case MVT::i64:
4515 if (Flags.isNest()) {
4516 // The 'nest' parameter, if any, is passed in R11.
4517 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4518 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4519
4520 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4521 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4522
4523 break;
4524 }
4525
4526 // These can be scalar arguments or elements of an integer array type
4527 // passed directly. Clang may use those instead of "byval" aggregate
4528 // types to avoid forcing arguments to memory unnecessarily.
4529 if (GPR_idx != Num_GPR_Regs) {
4530 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4531 FuncInfo->addLiveInAttr(VReg, Flags);
4532 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4533
4534 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4535 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4536 // value to MVT::i64 and then truncate to the correct register size.
4537 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4538 } else {
4539 if (CallConv == CallingConv::Fast)
4541
4542 needsLoad = true;
4544 }
4545 if (CallConv != CallingConv::Fast || needsLoad)
4546 ArgOffset += 8;
4547 break;
4548
4549 case MVT::f32:
4550 case MVT::f64:
4551 // These can be scalar arguments or elements of a float array type
4552 // passed directly. The latter are used to implement ELFv2 homogenous
4553 // float aggregates.
4554 if (FPR_idx != Num_FPR_Regs) {
4555 unsigned VReg;
4556
4557 if (ObjectVT == MVT::f32)
4558 VReg = MF.addLiveIn(FPR[FPR_idx],
4559 Subtarget.hasP8Vector()
4560 ? &PPC::VSSRCRegClass
4561 : &PPC::F4RCRegClass);
4562 else
4563 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4564 ? &PPC::VSFRCRegClass
4565 : &PPC::F8RCRegClass);
4566
4567 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4568 ++FPR_idx;
4569 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4570 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4571 // once we support fp <-> gpr moves.
4572
4573 // This can only ever happen in the presence of f32 array types,
4574 // since otherwise we never run out of FPRs before running out
4575 // of GPRs.
4576 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4577 FuncInfo->addLiveInAttr(VReg, Flags);
4578 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4579
4580 if (ObjectVT == MVT::f32) {
4581 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4582 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4583 DAG.getConstant(32, dl, MVT::i32));
4584 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4585 }
4586
4588 } else {
4589 if (CallConv == CallingConv::Fast)
4591
4592 needsLoad = true;
4593 }
4594
4595 // When passing an array of floats, the array occupies consecutive
4596 // space in the argument area; only round up to the next doubleword
4597 // at the end of the array. Otherwise, each float takes 8 bytes.
4598 if (CallConv != CallingConv::Fast || needsLoad) {
4599 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4600 ArgOffset += ArgSize;
4601 if (Flags.isInConsecutiveRegsLast())
4603 }
4604 break;
4605 case MVT::v4f32:
4606 case MVT::v4i32:
4607 case MVT::v8i16:
4608 case MVT::v16i8:
4609 case MVT::v2f64:
4610 case MVT::v2i64:
4611 case MVT::v1i128:
4612 case MVT::f128:
4613 // These can be scalar arguments or elements of a vector array type
4614 // passed directly. The latter are used to implement ELFv2 homogenous
4615 // vector aggregates.
4616 if (VR_idx != Num_VR_Regs) {
4617 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4618 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4619 ++VR_idx;
4620 } else {
4621 if (CallConv == CallingConv::Fast)
4623 needsLoad = true;
4624 }
4625 if (CallConv != CallingConv::Fast || needsLoad)
4626 ArgOffset += 16;
4627 break;
4628 }
4629
4630 // We need to load the argument to a virtual register if we determined
4631 // above that we ran out of physical registers of the appropriate type.
4632 if (needsLoad) {
4633 if (ObjSize < ArgSize && !isLittleEndian)
4635 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4636 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4637 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4638 }
4639
4640 InVals.push_back(ArgVal);
4641 }
4642
4643 // Area that is at least reserved in the caller of this function.
4644 unsigned MinReservedArea;
4645 if (HasParameterArea)
4646 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4647 else
4648 MinReservedArea = LinkageSize;
4649
4650 // Set the size that is at least reserved in caller of this function. Tail
4651 // call optimized functions' reserved stack space needs to be aligned so that
4652 // taking the difference between two stack areas will result in an aligned
4653 // stack.
4654 MinReservedArea =
4655 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4656 FuncInfo->setMinReservedArea(MinReservedArea);
4657
4658 // If the function takes variable number of arguments, make a frame index for
4659 // the start of the first vararg value... for expansion of llvm.va_start.
4660 // On ELFv2ABI spec, it writes:
4661 // C programs that are intended to be *portable* across different compilers
4662 // and architectures must use the header file <stdarg.h> to deal with variable
4663 // argument lists.
4664 if (isVarArg && MFI.hasVAStart()) {
4665 int Depth = ArgOffset;
4666
4667 FuncInfo->setVarArgsFrameIndex(
4668 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4670
4671 // If this function is vararg, store any remaining integer argument regs
4672 // to their spots on the stack so that they may be loaded by dereferencing
4673 // the result of va_next.
4674 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4676 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4677 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4678 SDValue Store =
4679 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4680 MemOps.push_back(Store);
4681 // Increment the address by four for the next argument to store
4683 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4684 }
4685 }
4686
4687 if (!MemOps.empty())
4688 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4689
4690 return Chain;
4691}
4692
4693/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4694/// adjusted to accommodate the arguments for the tailcall.
4695static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4696 unsigned ParamSize) {
4697
4698 if (!isTailCall) return 0;
4699
4702 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4703 // Remember only if the new adjustment is bigger.
4704 if (SPDiff < FI->getTailCallSPDelta())
4706
4707 return SPDiff;
4708}
4709
4710static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4711
4712static bool callsShareTOCBase(const Function *Caller,
4713 const GlobalValue *CalleeGV,
4714 const TargetMachine &TM) {
4715 // It does not make sense to call callsShareTOCBase() with a caller that
4716 // is PC Relative since PC Relative callers do not have a TOC.
4717#ifndef NDEBUG
4718 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4719 assert(!STICaller->isUsingPCRelativeCalls() &&
4720 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4721#endif
4722
4723 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4724 // don't have enough information to determine if the caller and callee share
4725 // the same TOC base, so we have to pessimistically assume they don't for
4726 // correctness.
4727 if (!CalleeGV)
4728 return false;
4729
4730 // If the callee is preemptable, then the static linker will use a plt-stub
4731 // which saves the toc to the stack, and needs a nop after the call
4732 // instruction to convert to a toc-restore.
4733 if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), CalleeGV))
4734 return false;
4735
4736 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4737 // We may need a TOC restore in the situation where the caller requires a
4738 // valid TOC but the callee is PC Relative and does not.
4741
4742 // If we have an Alias we can try to get the function from there.
4743 if (Alias) {
4744 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4746 }
4747
4748 // If we still have no valid function pointer we do not have enough
4749 // information to determine if the callee uses PC Relative calls so we must
4750 // assume that it does.
4751 if (!F)
4752 return false;
4753
4754 // If the callee uses PC Relative we cannot guarantee that the callee won't
4755 // clobber the TOC of the caller and so we must assume that the two
4756 // functions do not share a TOC base.
4757 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4758 if (STICallee->isUsingPCRelativeCalls())
4759 return false;
4760
4761 // If the GV is not a strong definition then we need to assume it can be
4762 // replaced by another function at link time. The function that replaces
4763 // it may not share the same TOC as the caller since the callee may be
4764 // replaced by a PC Relative version of the same function.
4765 if (!CalleeGV->isStrongDefinitionForLinker())
4766 return false;
4767
4768 // The medium and large code models are expected to provide a sufficiently
4769 // large TOC to provide all data addressing needs of a module with a
4770 // single TOC.
4771 if (CodeModel::Medium == TM.getCodeModel() ||
4772 CodeModel::Large == TM.getCodeModel())
4773 return true;
4774
4775 // Any explicitly-specified sections and section prefixes must also match.
4776 // Also, if we're using -ffunction-sections, then each function is always in
4777 // a different section (the same is true for COMDAT functions).
4778 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4779 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4780 return false;
4781 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4782 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4783 return false;
4784 }
4785
4786 return true;
4787}
4788
4789static bool
4791 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4792 assert(Subtarget.is64BitELFABI());
4793
4794 const unsigned PtrByteSize = 8;
4795 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4796
4797 static const MCPhysReg GPR[] = {
4798 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4799 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4800 };
4801 static const MCPhysReg VR[] = {
4802 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4803 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4804 };
4805
4806 const unsigned NumGPRs = std::size(GPR);
4807 const unsigned NumFPRs = 13;
4808 const unsigned NumVRs = std::size(VR);
4809 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4810
4811 unsigned NumBytes = LinkageSize;
4812 unsigned AvailableFPRs = NumFPRs;
4813 unsigned AvailableVRs = NumVRs;
4814
4815 for (const ISD::OutputArg& Param : Outs) {
4816 if (Param.Flags.isNest()) continue;
4817
4818 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4819 LinkageSize, ParamAreaSize, NumBytes,
4821 return true;
4822 }
4823 return false;
4824}
4825
4826static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4827 if (CB.arg_size() != CallerFn->arg_size())
4828 return false;
4829
4830 auto CalleeArgIter = CB.arg_begin();
4831 auto CalleeArgEnd = CB.arg_end();
4833
4835 const Value* CalleeArg = *CalleeArgIter;
4836 const Value* CallerArg = &(*CallerArgIter);
4837 if (CalleeArg == CallerArg)
4838 continue;
4839
4840 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4841 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4842 // }
4843 // 1st argument of callee is undef and has the same type as caller.
4844 if (CalleeArg->getType() == CallerArg->getType() &&
4846 continue;
4847
4848 return false;
4849 }
4850
4851 return true;
4852}
4853
4854// Returns true if TCO is possible between the callers and callees
4855// calling conventions.
4856static bool
4859 // Tail calls are possible with fastcc and ccc.
4861 return CC == CallingConv::C || CC == CallingConv::Fast;
4862 };
4864 return false;
4865
4866 // We can safely tail call both fastcc and ccc callees from a c calling
4867 // convention caller. If the caller is fastcc, we may have less stack space
4868 // than a non-fastcc caller with the same signature so disable tail-calls in
4869 // that case.
4870 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4871}
4872
4873bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4875 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4878 bool isCalleeExternalSymbol) const {
4880
4881 if (DisableSCO && !TailCallOpt) return false;
4882
4883 // Variadic argument functions are not supported.
4884 if (isVarArg) return false;
4885
4886 // Check that the calling conventions are compatible for tco.
4888 return false;
4889
4890 // Caller contains any byval parameter is not supported.
4891 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4892 return false;
4893
4894 // Callee contains any byval parameter is not supported, too.
4895 // Note: This is a quick work around, because in some cases, e.g.
4896 // caller's stack size > callee's stack size, we are still able to apply
4897 // sibling call optimization. For example, gcc is able to do SCO for caller1
4898 // in the following example, but not for caller2.
4899 // struct test {
4900 // long int a;
4901 // char ary[56];
4902 // } gTest;
4903 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4904 // b->a = v.a;
4905 // return 0;
4906 // }
4907 // void caller1(struct test a, struct test c, struct test *b) {
4908 // callee(gTest, b); }
4909 // void caller2(struct test *b) { callee(gTest, b); }
4910 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4911 return false;
4912
4913 // If callee and caller use different calling conventions, we cannot pass
4914 // parameters on stack since offsets for the parameter area may be different.
4915 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4916 return false;
4917
4918 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4919 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4920 // callee potentially have different TOC bases then we cannot tail call since
4921 // we need to restore the TOC pointer after the call.
4922 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4923 // We cannot guarantee this for indirect calls or calls to external functions.
4924 // When PC-Relative addressing is used, the concept of the TOC is no longer
4925 // applicable so this check is not required.
4926 // Check first for indirect calls.
4927 if (!Subtarget.isUsingPCRelativeCalls() &&
4929 return false;
4930
4931 // Check if we share the TOC base.
4932 if (!Subtarget.isUsingPCRelativeCalls() &&
4934 return false;
4935
4936 // TCO allows altering callee ABI, so we don't have to check further.
4938 return true;
4939
4940 if (DisableSCO) return false;
4941
4942 // If callee use the same argument list that caller is using, then we can
4943 // apply SCO on this case. If it is not, then we need to check if callee needs
4944 // stack for passing arguments.
4945 // PC Relative tail calls may not have a CallBase.
4946 // If there is no CallBase we cannot verify if we have the same argument
4947 // list so assume that we don't have the same argument list.
4948 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
4949 needStackSlotPassParameters(Subtarget, Outs))
4950 return false;
4951 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4952 return false;
4953
4954 return true;
4955}
4956
4957/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4958/// for tail call optimization. Targets which want to do tail call
4959/// optimization should implement this function.
4960bool PPCTargetLowering::IsEligibleForTailCallOptimization(
4962 CallingConv::ID CallerCC, bool isVarArg,
4963 const SmallVectorImpl<ISD::InputArg> &Ins) const {
4964 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4965 return false;
4966
4967 // Variable argument functions are not supported.
4968 if (isVarArg)
4969 return false;
4970
4972 // Functions containing by val parameters are not supported.
4973 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4974 return false;
4975
4976 // Non-PIC/GOT tail calls are supported.
4977 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4978 return true;
4979
4980 // At the moment we can only do local tail calls (in same module, hidden
4981 // or protected) if we are generating PIC.
4982 if (CalleeGV)
4983 return CalleeGV->hasHiddenVisibility() ||
4984 CalleeGV->hasProtectedVisibility();
4985 }
4986
4987 return false;
4988}
4989
4990/// isCallCompatibleAddress - Return the immediate to use if the specified
4991/// 32-bit value is representable in the immediate field of a BxA instruction.
4994 if (!C) return nullptr;
4995
4996 int Addr = C->getZExtValue();
4997 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4999 return nullptr; // Top 6 bits have to be sext of immediate.
5000
5001 return DAG
5002 .getConstant(
5003 (int)C->getZExtValue() >> 2, SDLoc(Op),
5005 .getNode();
5006}
5007
5008namespace {
5009
5010struct TailCallArgumentInfo {
5011 SDValue Arg;
5012 SDValue FrameIdxOp;
5013 int FrameIdx = 0;
5014
5015 TailCallArgumentInfo() = default;
5016};
5017
5018} // end anonymous namespace
5019
5020/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5022 SelectionDAG &DAG, SDValue Chain,
5025 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5026 SDValue Arg = TailCallArgs[i].Arg;
5027 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5028 int FI = TailCallArgs[i].FrameIdx;
5029 // Store relative to framepointer.
5030 MemOpChains.push_back(DAG.getStore(
5031 Chain, dl, Arg, FIN,
5033 }
5034}
5035
5036/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5037/// the appropriate stack slot for the tail call optimized function call.
5040 int SPDiff, const SDLoc &dl) {
5041 if (SPDiff) {
5042 // Calculate the new stack slot for the return address.
5044 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5045 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5046 bool isPPC64 = Subtarget.isPPC64();
5047 int SlotSize = isPPC64 ? 8 : 4;
5048 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5049 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5050 NewRetAddrLoc, true);
5051 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5053 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5055 }
5056 return Chain;
5057}
5058
5059/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5060/// the position of the argument.
5061static void
5063 SDValue Arg, int SPDiff, unsigned ArgOffset,
5065 int Offset = ArgOffset + SPDiff;
5066 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5067 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5068 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5069 SDValue FIN = DAG.getFrameIndex(FI, VT);
5070 TailCallArgumentInfo Info;
5071 Info.Arg = Arg;
5072 Info.FrameIdxOp = FIN;
5073 Info.FrameIdx = FI;
5074 TailCallArguments.push_back(Info);
5075}
5076
5077/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5078/// stack slot. Returns the chain as result and the loaded frame pointers in
5079/// LROpOut/FPOpout. Used when tail calling.
5080SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5081 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5082 SDValue &FPOpOut, const SDLoc &dl) const {
5083 if (SPDiff) {
5084 // Load the LR and FP stack slot for later adjusting.
5085 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5086 LROpOut = getReturnAddrFrameIndex(DAG);
5087 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5088 Chain = SDValue(LROpOut.getNode(), 1);
5089 }
5090 return Chain;
5091}
5092
5093/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5094/// by "Src" to address "Dst" of size "Size". Alignment information is
5095/// specified by the specific parameter attribute. The copy will be passed as
5096/// a byval function parameter.
5097/// Sometimes what we are copying is the end of a larger object, the part that
5098/// does not fit in registers.
5100 SDValue Chain, ISD::ArgFlagsTy Flags,
5101 SelectionDAG &DAG, const SDLoc &dl) {
5102 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5103 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5104 Flags.getNonZeroByValAlign(), false, false, false,
5106}
5107
5108/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5109/// tail calls.
5112 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5113 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5116 if (!isTailCall) {
5117 if (isVector) {
5118 SDValue StackPtr;
5119 if (isPPC64)
5120 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5121 else
5122 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5123 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5124 DAG.getConstant(ArgOffset, dl, PtrVT));
5125 }
5126 MemOpChains.push_back(
5127 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5128 // Calculate and remember argument location.
5129 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5131}
5132
5133static void
5135 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5136 SDValue FPOp,
5138 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5139 // might overwrite each other in case of tail call optimization.
5141 // Do not flag preceding copytoreg stuff together with the following stuff.
5142 InGlue = SDValue();
5144 MemOpChains2, dl);
5145 if (!MemOpChains2.empty())
5146 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5147
5148 // Store the return address to the appropriate stack slot.
5149 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5150
5151 // Emit callseq_end just before tailcall node.
5152 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5153 InGlue = Chain.getValue(1);
5154}
5155
5156// Is this global address that of a function that can be called by name? (as
5157// opposed to something that must hold a descriptor for an indirect call).
5158static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5159 if (GV) {
5160 if (GV->isThreadLocal())
5161 return false;
5162
5163 return GV->getValueType()->isFunctionTy();
5164 }
5165
5166 return false;
5167}
5168
5169SDValue PPCTargetLowering::LowerCallResult(
5170 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5171 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5172 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5174 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5175 *DAG.getContext());
5176
5177 CCRetInfo.AnalyzeCallResult(
5178 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5180 : RetCC_PPC);
5181
5182 // Copy all of the result registers out of their specified physreg.
5183 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5184 CCValAssign &VA = RVLocs[i];
5185 assert(VA.isRegLoc() && "Can only return in registers!");
5186
5187 SDValue Val;
5188
5189 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5190 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5191 InGlue);
5192 Chain = Lo.getValue(1);
5193 InGlue = Lo.getValue(2);
5194 VA = RVLocs[++i]; // skip ahead to next loc
5195 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5196 InGlue);
5197 Chain = Hi.getValue(1);
5198 InGlue = Hi.getValue(2);
5199 if (!Subtarget.isLittleEndian())
5200 std::swap (Lo, Hi);
5201 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5202 } else {
5203 Val = DAG.getCopyFromReg(Chain, dl,
5204 VA.getLocReg(), VA.getLocVT(), InGlue);
5205 Chain = Val.getValue(1);
5206 InGlue = Val.getValue(2);
5207 }
5208
5209 switch (VA.getLocInfo()) {
5210 default: llvm_unreachable("Unknown loc info!");
5211 case CCValAssign::Full: break;
5212 case CCValAssign::AExt:
5213 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5214 break;
5215 case CCValAssign::ZExt:
5216 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5217 DAG.getValueType(VA.getValVT()));
5218 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5219 break;
5220 case CCValAssign::SExt:
5221 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5222 DAG.getValueType(VA.getValVT()));
5223 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5224 break;
5225 }
5226
5227 InVals.push_back(Val);
5228 }
5229
5230 return Chain;
5231}
5232
5234 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5236 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5237
5238 // PatchPoint calls are not indirect.
5239 if (isPatchPoint)
5240 return false;
5241
5243 return false;
5244
5245 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5246 // becuase the immediate function pointer points to a descriptor instead of
5247 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5248 // pointer immediate points to the global entry point, while the BLA would
5249 // need to jump to the local entry point (see rL211174).
5250 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5252 return false;
5253
5254 return true;
5255}
5256
5257// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5258static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5259 return Subtarget.isAIXABI() ||
5260 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5261}
5262
5264 const Function &Caller, const SDValue &Callee,
5265 const PPCSubtarget &Subtarget,
5266 const TargetMachine &TM,
5267 bool IsStrictFPCall = false) {
5268 if (CFlags.IsTailCall)
5269 return PPCISD::TC_RETURN;
5270
5271 unsigned RetOpc = 0;
5272 // This is a call through a function pointer.
5273 if (CFlags.IsIndirect) {
5274 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5275 // indirect calls. The save of the caller's TOC pointer to the stack will be
5276 // inserted into the DAG as part of call lowering. The restore of the TOC
5277 // pointer is modeled by using a pseudo instruction for the call opcode that
5278 // represents the 2 instruction sequence of an indirect branch and link,
5279 // immediately followed by a load of the TOC pointer from the the stack save
5280 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5281 // as it is not saved or used.
5283 : PPCISD::BCTRL;
5284 } else if (Subtarget.isUsingPCRelativeCalls()) {
5285 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5287 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5288 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5289 // immediately following the call instruction if the caller and callee may
5290 // have different TOC bases. At link time if the linker determines the calls
5291 // may not share a TOC base, the call is redirected to a trampoline inserted
5292 // by the linker. The trampoline will (among other things) save the callers
5293 // TOC pointer at an ABI designated offset in the linkage area and the
5294 // linker will rewrite the nop to be a load of the TOC pointer from the
5295 // linkage area into gpr2.
5297 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5298 RetOpc =
5300 } else
5302 if (IsStrictFPCall) {
5303 switch (RetOpc) {
5304 default:
5305 llvm_unreachable("Unknown call opcode");
5308 break;
5309 case PPCISD::BCTRL:
5311 break;
5312 case PPCISD::CALL_NOTOC:
5314 break;
5315 case PPCISD::CALL:
5317 break;
5318 case PPCISD::CALL_NOP:
5320 break;
5321 }
5322 }
5323 return RetOpc;
5324}
5325
5327 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5328 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5329 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5330 return SDValue(Dest, 0);
5331
5332 // Returns true if the callee is local, and false otherwise.
5333 auto isLocalCallee = [&]() {
5336 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5337
5338 return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5340 };
5341
5342 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5343 // a static relocation model causes some versions of GNU LD (2.17.50, at
5344 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5345 // built with secure-PLT.
5346 bool UsePlt =
5347 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5349
5350 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5351 const TargetMachine &TM = Subtarget.getTargetMachine();
5352 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5353 MCSymbolXCOFF *S =
5355
5357 return DAG.getMCSymbol(S, PtrVT);
5358 };
5359
5361 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5362 if (isFunctionGlobalAddress(GV)) {
5363 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5364
5365 if (Subtarget.isAIXABI()) {
5366 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5368 }
5369 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5370 UsePlt ? PPCII::MO_PLT : 0);
5371 }
5372
5374 const char *SymName = S->getSymbol();
5375 if (Subtarget.isAIXABI()) {
5376 // If there exists a user-declared function whose name is the same as the
5377 // ExternalSymbol's, then we pick up the user-declared version.
5379 if (const Function *F =
5380 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5382
5383 // On AIX, direct function calls reference the symbol for the function's
5384 // entry point, which is named by prepending a "." before the function's
5385 // C-linkage name. A Qualname is returned here because an external
5386 // function entry point is a csect with XTY_ER property.
5387 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5388 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5389 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5390 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5392 return Sec->getQualNameSymbol();
5393 };
5394
5395 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5396 }
5397 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5398 UsePlt ? PPCII::MO_PLT : 0);
5399 }
5400
5401 // No transformation needed.
5402 assert(Callee.getNode() && "What no callee?");
5403 return Callee;
5404}
5405
5407 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5408 "Expected a CALLSEQ_STARTSDNode.");
5409
5410 // The last operand is the chain, except when the node has glue. If the node
5411 // has glue, then the last operand is the glue, and the chain is the second
5412 // last operand.
5413 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5414 if (LastValue.getValueType() != MVT::Glue)
5415 return LastValue;
5416
5417 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5418}
5419
5420// Creates the node that moves a functions address into the count register
5421// to prepare for an indirect call instruction.
5423 SDValue &Glue, SDValue &Chain,
5424 const SDLoc &dl) {
5425 SDValue MTCTROps[] = {Chain, Callee, Glue};
5426 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5427 Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5428 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5429 // The glue is the second value produced.
5430 Glue = Chain.getValue(1);
5431}
5432
5434 SDValue &Glue, SDValue &Chain,
5436 const CallBase *CB, const SDLoc &dl,
5437 bool hasNest,
5438 const PPCSubtarget &Subtarget) {
5439 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5440 // entry point, but to the function descriptor (the function entry point
5441 // address is part of the function descriptor though).
5442 // The function descriptor is a three doubleword structure with the
5443 // following fields: function entry point, TOC base address and
5444 // environment pointer.
5445 // Thus for a call through a function pointer, the following actions need
5446 // to be performed:
5447 // 1. Save the TOC of the caller in the TOC save area of its stack
5448 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5449 // 2. Load the address of the function entry point from the function
5450 // descriptor.
5451 // 3. Load the TOC of the callee from the function descriptor into r2.
5452 // 4. Load the environment pointer from the function descriptor into
5453 // r11.
5454 // 5. Branch to the function entry point address.
5455 // 6. On return of the callee, the TOC of the caller needs to be
5456 // restored (this is done in FinishCall()).
5457 //
5458 // The loads are scheduled at the beginning of the call sequence, and the
5459 // register copies are flagged together to ensure that no other
5460 // operations can be scheduled in between. E.g. without flagging the
5461 // copies together, a TOC access in the caller could be scheduled between
5462 // the assignment of the callee TOC and the branch to the callee, which leads
5463 // to incorrect code.
5464
5465 // Start by loading the function address from the descriptor.
5467 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5471
5472 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5473
5474 // Registers used in building the DAG.
5476 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5477
5478 // Offsets of descriptor members.
5479 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5480 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5481
5482 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5483 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5484
5485 // One load for the functions entry point address.
5486 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5487 Alignment, MMOFlags);
5488
5489 // One for loading the TOC anchor for the module that contains the called
5490 // function.
5492 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5493 SDValue TOCPtr =
5494 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5495 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5496
5497 // One for loading the environment pointer.
5499 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5501 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5502 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5503
5504
5505 // Then copy the newly loaded TOC anchor to the TOC pointer.
5506 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5507 Chain = TOCVal.getValue(0);
5508 Glue = TOCVal.getValue(1);
5509
5510 // If the function call has an explicit 'nest' parameter, it takes the
5511 // place of the environment pointer.
5512 assert((!hasNest || !Subtarget.isAIXABI()) &&
5513 "Nest parameter is not supported on AIX.");
5514 if (!hasNest) {
5515 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5516 Chain = EnvVal.getValue(0);
5517 Glue = EnvVal.getValue(1);
5518 }
5519
5520 // The rest of the indirect call sequence is the same as the non-descriptor
5521 // DAG.
5522 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5523}
5524
5525static void
5528 SelectionDAG &DAG,
5529 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5530 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5531 const PPCSubtarget &Subtarget) {
5532 const bool IsPPC64 = Subtarget.isPPC64();
5533 // MVT for a general purpose register.
5534 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5535
5536 // First operand is always the chain.
5537 Ops.push_back(Chain);
5538
5539 // If it's a direct call pass the callee as the second operand.
5540 if (!CFlags.IsIndirect)
5541 Ops.push_back(Callee);
5542 else {
5543 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5544
5545 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5546 // on the stack (this would have been done in `LowerCall_64SVR4` or
5547 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5548 // represents both the indirect branch and a load that restores the TOC
5549 // pointer from the linkage area. The operand for the TOC restore is an add
5550 // of the TOC save offset to the stack pointer. This must be the second
5551 // operand: after the chain input but before any other variadic arguments.
5552 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5553 // saved or used.
5554 if (isTOCSaveRestoreRequired(Subtarget)) {
5556
5557 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5558 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5559 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5560 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5561 Ops.push_back(AddTOC);
5562 }
5563
5564 // Add the register used for the environment pointer.
5565 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5566 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5567 RegVT));
5568
5569
5570 // Add CTR register as callee so a bctr can be emitted later.
5571 if (CFlags.IsTailCall)
5572 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5573 }
5574
5575 // If this is a tail call add stack pointer delta.
5576 if (CFlags.IsTailCall)
5577 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5578
5579 // Add argument registers to the end of the list so that they are known live
5580 // into the call.
5581 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5582 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5583 RegsToPass[i].second.getValueType()));
5584
5585 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5586 // no way to mark dependencies as implicit here.
5587 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5588 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5589 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5590 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5591
5592 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5593 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5594 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5595
5596 // Add a register mask operand representing the call-preserved registers.
5597 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5598 const uint32_t *Mask =
5599 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5600 assert(Mask && "Missing call preserved mask for calling convention");
5601 Ops.push_back(DAG.getRegisterMask(Mask));
5602
5603 // If the glue is valid, it is the last operand.
5604 if (Glue.getNode())
5605 Ops.push_back(Glue);
5606}
5607
5608SDValue PPCTargetLowering::FinishCall(
5609 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5610 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5612 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5613 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5614
5615 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5616 Subtarget.isAIXABI())
5617 setUsesTOCBasePtr(DAG);
5618
5619 unsigned CallOpc =
5621 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5622
5623 if (!CFlags.IsIndirect)
5624 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5625 else if (Subtarget.usesFunctionDescriptors())
5627 dl, CFlags.HasNest, Subtarget);
5628 else
5629 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5630
5631 // Build the operand list for the call instruction.
5633 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5634 SPDiff, Subtarget);
5635
5636 // Emit tail call.
5637 if (CFlags.IsTailCall) {
5638 // Indirect tail call when using PC Relative calls do not have the same
5639 // constraints.
5640 assert(((Callee.getOpcode() == ISD::Register &&
5641 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5642 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5643 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5645 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5646 "Expecting a global address, external symbol, absolute value, "
5647 "register or an indirect tail call when PC Relative calls are "
5648 "used.");
5649 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5651 "Unexpected call opcode for a tail call.");
5653 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5654 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5655 return Ret;
5656 }
5657
5658 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5659 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5660 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5661 Glue = Chain.getValue(1);
5662
5663 // When performing tail call optimization the callee pops its arguments off
5664 // the stack. Account for this here so these bytes can be pushed back on in
5665 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5666 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5668 ? NumBytes
5669 : 0;
5670
5671 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5672 Glue = Chain.getValue(1);
5673
5674 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5675 DAG, InVals);
5676}
5677
5680 const Function *CallerFunc = CB->getCaller();
5681 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5682 const Function *CalleeFunc = CB->getCalledFunction();
5683 if (!CalleeFunc)
5684 return false;
5686
5689
5690 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5691 CalleeFunc->getAttributes(), Outs, *this,
5692 CalleeFunc->getParent()->getDataLayout());
5693
5694 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5695 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5696 false /*isCalleeExternalSymbol*/);
5697}
5698
5699bool PPCTargetLowering::isEligibleForTCO(
5701 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5704 bool isCalleeExternalSymbol) const {
5705 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5706 return false;
5707
5708 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5709 return IsEligibleForTailCallOptimization_64SVR4(
5710 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5712 else
5713 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5714 isVarArg, Ins);
5715}
5716
5717SDValue
5718PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5719 SmallVectorImpl<SDValue> &InVals) const {
5720 SelectionDAG &DAG = CLI.DAG;
5721 SDLoc &dl = CLI.DL;
5723 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5725 SDValue Chain = CLI.Chain;
5726 SDValue Callee = CLI.Callee;
5727 bool &isTailCall = CLI.IsTailCall;
5728 CallingConv::ID CallConv = CLI.CallConv;
5729 bool isVarArg = CLI.IsVarArg;
5730 bool isPatchPoint = CLI.IsPatchPoint;
5731 const CallBase *CB = CLI.CB;
5732
5733 if (isTailCall) {
5737 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5739
5740 isTailCall =
5741 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5743 if (isTailCall) {
5744 ++NumTailCalls;
5745 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5747
5748 // PC Relative calls no longer guarantee that the callee is a Global
5749 // Address Node. The callee could be an indirect tail call in which
5750 // case the SDValue for the callee could be a load (to load the address
5751 // of a function pointer) or it may be a register copy (to move the
5752 // address of the callee from a function parameter into a virtual
5753 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5754 assert((Subtarget.isUsingPCRelativeCalls() ||
5756 "Callee should be an llvm::Function object.");
5757
5758 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5759 << "\nTCO callee: ");
5760 LLVM_DEBUG(Callee.dump());
5761 }
5762 }
5763
5764 if (!isTailCall && CB && CB->isMustTailCall())
5765 report_fatal_error("failed to perform tail call elimination on a call "
5766 "site marked musttail");
5767
5768 // When long calls (i.e. indirect calls) are always used, calls are always
5769 // made via function pointer. If we have a function name, first translate it
5770 // into a pointer.
5771 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5772 !isTailCall)
5773 Callee = LowerGlobalAddress(Callee, DAG);
5774
5775 CallFlags CFlags(
5776 CallConv, isTailCall, isVarArg, isPatchPoint,
5777 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5778 // hasNest
5779 Subtarget.is64BitELFABI() &&
5780 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5781 CLI.NoMerge);
5782
5783 if (Subtarget.isAIXABI())
5784 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5785 InVals, CB);
5786
5787 assert(Subtarget.isSVR4ABI());
5788 if (Subtarget.isPPC64())
5789 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5790 InVals, CB);
5791 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5792 InVals, CB);
5793}
5794
5795SDValue PPCTargetLowering::LowerCall_32SVR4(
5796 SDValue Chain, SDValue Callee, CallFlags CFlags,
5798 const SmallVectorImpl<SDValue> &OutVals,
5799 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5801 const CallBase *CB) const {
5802 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5803 // of the 32-bit SVR4 ABI stack frame layout.
5804
5805 const CallingConv::ID CallConv = CFlags.CallConv;
5806 const bool IsVarArg = CFlags.IsVarArg;
5807 const bool IsTailCall = CFlags.IsTailCall;
5808
5809 assert((CallConv == CallingConv::C ||
5810 CallConv == CallingConv::Cold ||
5811 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5812
5813 const Align PtrAlign(4);
5814
5816
5817 // Mark this function as potentially containing a function that contains a
5818 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5819 // and restoring the callers stack pointer in this functions epilog. This is
5820 // done because by tail calling the called function might overwrite the value
5821 // in this function's (MF) stack pointer stack slot 0(SP).
5822 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5823 CallConv == CallingConv::Fast)
5824 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5825
5826 // Count how many bytes are to be pushed on the stack, including the linkage
5827 // area, parameter list area and the part of the local variable space which
5828 // contains copies of aggregates which are passed by value.
5829
5830 // Assign locations to all of the outgoing arguments.
5832 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5833
5834 // Reserve space for the linkage area on the stack.
5835 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5836 PtrAlign);
5837 if (useSoftFloat())
5838 CCInfo.PreAnalyzeCallOperands(Outs);
5839
5840 if (IsVarArg) {
5841 // Handle fixed and variable vector arguments differently.
5842 // Fixed vector arguments go into registers as long as registers are
5843 // available. Variable vector arguments always go into memory.
5844 unsigned NumArgs = Outs.size();
5845
5846 for (unsigned i = 0; i != NumArgs; ++i) {
5847 MVT ArgVT = Outs[i].VT;
5848 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5849 bool Result;
5850
5851 if (Outs[i].IsFixed) {
5852 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5853 CCInfo);
5854 } else {
5856 ArgFlags, CCInfo);
5857 }
5858
5859 if (Result) {
5860#ifndef NDEBUG
5861 errs() << "Call operand #" << i << " has unhandled type "
5862 << ArgVT << "\n";
5863#endif
5864 llvm_unreachable(nullptr);
5865 }
5866 }
5867 } else {
5868 // All arguments are treated the same.
5869 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5870 }
5871 CCInfo.clearWasPPCF128();
5872
5873 // Assign locations to all of the outgoing aggregate by value arguments.
5875 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5876
5877 // Reserve stack space for the allocations in CCInfo.
5878 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5879
5880 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5881
5882 // Size of the linkage area, parameter list area and the part of the local
5883 // space variable where copies of aggregates which are passed by value are
5884 // stored.
5885 unsigned NumBytes = CCByValInfo.getStackSize();
5886
5887 // Calculate by how many bytes the stack has to be adjusted in case of tail
5888 // call optimization.
5889 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5890
5891 // Adjust the stack pointer for the new arguments...
5892 // These operations are automatically eliminated by the prolog/epilog pass
5893 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5894 SDValue CallSeqStart = Chain;
5895
5896 // Load the return address and frame pointer so it can be moved somewhere else
5897 // later.
5898 SDValue LROp, FPOp;
5899 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5900
5901 // Set up a copy of the stack pointer for use loading and storing any
5902 // arguments that may not fit in the registers available for argument
5903 // passing.
5904 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5905
5909
5910 bool seenFloatArg = false;
5911 // Walk the register/memloc assignments, inserting copies/loads.
5912 // i - Tracks the index into the list of registers allocated for the call
5913 // RealArgIdx - Tracks the index into the list of actual function arguments
5914 // j - Tracks the index into the list of byval arguments
5915 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5916 i != e;
5917 ++i, ++RealArgIdx) {
5918 CCValAssign &VA = ArgLocs[i];
5919 SDValue Arg = OutVals[RealArgIdx];
5920 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5921
5922 if (Flags.isByVal()) {
5923 // Argument is an aggregate which is passed by value, thus we need to
5924 // create a copy of it in the local variable space of the current stack
5925 // frame (which is the stack frame of the caller) and pass the address of
5926 // this copy to the callee.
5927 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5929 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5930
5931 // Memory reserved in the local variable space of the callers stack frame.
5932 unsigned LocMemOffset = ByValVA.getLocMemOffset();
5933
5936 StackPtr, PtrOff);
5937
5938 // Create a copy of the argument in the local area of the current
5939 // stack frame.
5942 CallSeqStart.getNode()->getOperand(0),
5943 Flags, DAG, dl);
5944
5945 // This must go outside the CALLSEQ_START..END.
5947 SDLoc(MemcpyCall));
5948 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5949 NewCallSeqStart.getNode());
5950 Chain = CallSeqStart = NewCallSeqStart;
5951
5952 // Pass the address of the aggregate copy on the stack either in a
5953 // physical register or in the parameter list area of the current stack
5954 // frame to the callee.
5955 Arg = PtrOff;
5956 }
5957
5958 // When useCRBits() is true, there can be i1 arguments.
5959 // It is because getRegisterType(MVT::i1) => MVT::i1,
5960 // and for other integer types getRegisterType() => MVT::i32.
5961 // Extend i1 and ensure callee will get i32.
5962 if (Arg.getValueType() == MVT::i1)
5963 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5964 dl, MVT::i32, Arg);
5965
5966 if (VA.isRegLoc()) {
5967 seenFloatArg |= VA.getLocVT().isFloatingPoint();
5968 // Put argument in a physical register.
5969 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5970 bool IsLE = Subtarget.isLittleEndian();
5971 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5972 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5973 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5974 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5975 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5976 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5977 SVal.getValue(0)));
5978 } else
5979 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5980 } else {
5981 // Put argument in the parameter list area of the current stack frame.
5982 assert(VA.isMemLoc());
5983 unsigned LocMemOffset = VA.getLocMemOffset();
5984
5985 if (!IsTailCall) {
5988 StackPtr, PtrOff);
5989
5990 MemOpChains.push_back(
5991 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5992 } else {
5993 // Calculate and remember argument location.
5996 }
5997 }
5998 }
5999
6000 if (!MemOpChains.empty())
6001 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6002
6003 // Build a sequence of copy-to-reg nodes chained together with token chain
6004 // and flag operands which copy the outgoing args into the appropriate regs.
6006 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6007 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6008 RegsToPass[i].second, InGlue);
6009 InGlue = Chain.getValue(1);
6010 }
6011
6012 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6013 // registers.
6014 if (IsVarArg) {
6015 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6016 SDValue Ops[] = { Chain, InGlue };
6017
6018 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6019 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6020
6021 InGlue = Chain.getValue(1);
6022 }
6023
6024 if (IsTailCall)
6025 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6027
6028 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6029 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6030}
6031
6032// Copy an argument into memory, being careful to do this outside the
6033// call sequence for the call to which the argument belongs.
6034SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6036 SelectionDAG &DAG, const SDLoc &dl) const {
6038 CallSeqStart.getNode()->getOperand(0),
6039 Flags, DAG, dl);
6040 // The MEMCPY must go outside the CALLSEQ_START..END.
6041 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6043 SDLoc(MemcpyCall));
6044 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6045 NewCallSeqStart.getNode());
6046 return NewCallSeqStart;
6047}
6048
6049SDValue PPCTargetLowering::LowerCall_64SVR4(
6050 SDValue Chain, SDValue Callee, CallFlags CFlags,
6052 const SmallVectorImpl<SDValue> &OutVals,
6053 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6055 const CallBase *CB) const {
6056 bool isELFv2ABI = Subtarget.isELFv2ABI();
6057 bool isLittleEndian = Subtarget.isLittleEndian();
6058 unsigned NumOps = Outs.size();
6059 bool IsSibCall = false;
6060 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6061
6063 unsigned PtrByteSize = 8;
6064
6066
6067 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6068 IsSibCall = true;
6069
6070 // Mark this function as potentially containing a function that contains a
6071 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6072 // and restoring the callers stack pointer in this functions epilog. This is
6073 // done because by tail calling the called function might overwrite the value
6074 // in this function's (MF) stack pointer stack slot 0(SP).
6075 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6076 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6077
6078 assert(!(IsFastCall && CFlags.IsVarArg) &&
6079 "fastcc not supported on varargs functions");
6080
6081 // Count how many bytes are to be pushed on the stack, including the linkage
6082 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6083 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6084 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6085 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6086 unsigned NumBytes = LinkageSize;
6087 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6088
6089 static const MCPhysReg GPR[] = {
6090 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6091 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6092 };
6093 static const MCPhysReg VR[] = {
6094 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6095 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6096 };
6097
6098 const unsigned NumGPRs = std::size(GPR);
6099 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6100 const unsigned NumVRs = std::size(VR);
6101
6102 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6103 // can be passed to the callee in registers.
6104 // For the fast calling convention, there is another check below.
6105 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6106 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6107 if (!HasParameterArea) {
6108 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6109 unsigned AvailableFPRs = NumFPRs;
6110 unsigned AvailableVRs = NumVRs;
6111 unsigned NumBytesTmp = NumBytes;
6112 for (unsigned i = 0; i != NumOps; ++i) {
6113 if (Outs[i].Flags.isNest()) continue;
6114 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6115 PtrByteSize, LinkageSize, ParamAreaSize,
6117 HasParameterArea = true;
6118 }
6119 }
6120
6121 // When using the fast calling convention, we don't provide backing for
6122 // arguments that will be in registers.
6123 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6124
6125 // Avoid allocating parameter area for fastcc functions if all the arguments
6126 // can be passed in the registers.
6127 if (IsFastCall)
6128 HasParameterArea = false;
6129
6130 // Add up all the space actually used.
6131 for (unsigned i = 0; i != NumOps; ++i) {
6132 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6133 EVT ArgVT = Outs[i].VT;
6134 EVT OrigVT = Outs[i].ArgVT;
6135
6136 if (Flags.isNest())
6137 continue;
6138
6139 if (IsFastCall) {
6140 if (Flags.isByVal()) {
6141 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6142 if (NumGPRsUsed > NumGPRs)
6143 HasParameterArea = true;
6144 } else {
6145 switch (ArgVT.getSimpleVT().SimpleTy) {
6146 default: llvm_unreachable("Unexpected ValueType for argument!");
6147 case MVT::i1:
6148 case MVT::i32:
6149 case MVT::i64:
6150 if (++NumGPRsUsed <= NumGPRs)
6151 continue;
6152 break;
6153 case MVT::v4i32:
6154 case MVT::v8i16:
6155 case MVT::v16i8:
6156 case MVT::v2f64:
6157 case MVT::v2i64:
6158 case MVT::v1i128:
6159 case MVT::f128:
6160 if (++NumVRsUsed <= NumVRs)
6161 continue;
6162 break;
6163 case MVT::v4f32:
6164 if (++NumVRsUsed <= NumVRs)
6165 continue;
6166 break;
6167 case MVT::f32:
6168 case MVT::f64:
6169 if (++NumFPRsUsed <= NumFPRs)
6170 continue;
6171 break;
6172 }
6173 HasParameterArea = true;
6174 }
6175 }
6176
6177 /* Respect alignment of argument on the stack. */
6178 auto Alignement =
6180 NumBytes = alignTo(NumBytes, Alignement);
6181
6182 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6183 if (Flags.isInConsecutiveRegsLast())
6184 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6185 }
6186
6187 unsigned NumBytesActuallyUsed = NumBytes;
6188
6189 // In the old ELFv1 ABI,
6190 // the prolog code of the callee may store up to 8 GPR argument registers to
6191 // the stack, allowing va_start to index over them in memory if its varargs.
6192 // Because we cannot tell if this is needed on the caller side, we have to
6193 // conservatively assume that it is needed. As such, make sure we have at
6194 // least enough stack space for the caller to store the 8 GPRs.
6195 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6196 // really requires memory operands, e.g. a vararg function.
6197 if (HasParameterArea)
6198 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6199 else
6200 NumBytes = LinkageSize;
6201
6202 // Tail call needs the stack to be aligned.
6203 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6204 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6205
6206 int SPDiff = 0;
6207
6208 // Calculate by how many bytes the stack has to be adjusted in case of tail
6209 // call optimization.
6210 if (!IsSibCall)
6211 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6212
6213 // To protect arguments on the stack from being clobbered in a tail call,
6214 // force all the loads to happen before doing any other lowering.
6215 if (CFlags.IsTailCall)
6216 Chain = DAG.getStackArgumentTokenFactor(Chain);
6217
6218 // Adjust the stack pointer for the new arguments...
6219 // These operations are automatically eliminated by the prolog/epilog pass
6220 if (!IsSibCall)
6221 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6222 SDValue CallSeqStart = Chain;
6223
6224 // Load the return address and frame pointer so it can be move somewhere else
6225 // later.
6226 SDValue LROp, FPOp;
6227 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6228
6229 // Set up a copy of the stack pointer for use loading and storing any
6230 // arguments that may not fit in the registers available for argument
6231 // passing.
6232 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6233
6234 // Figure out which arguments are going to go in registers, and which in
6235 // memory. Also, if this is a vararg function, floating point operations
6236 // must be stored to our stack, and loaded into integer regs as well, if
6237 // any integer regs are available for argument passing.
6238 unsigned ArgOffset = LinkageSize;
6239
6242
6244 for (unsigned i = 0; i != NumOps; ++i) {
6245 SDValue Arg = OutVals[i];
6246 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6247 EVT ArgVT = Outs[i].VT;
6248 EVT OrigVT = Outs[i].ArgVT;
6249
6250 // PtrOff will be used to store the current argument to the stack if a
6251 // register cannot be found for it.
6253
6254 // We re-align the argument offset for each argument, except when using the
6255 // fast calling convention, when we need to make sure we do that only when
6256 // we'll actually use a stack slot.
6257 auto ComputePtrOff = [&]() {
6258 /* Respect alignment of argument on the stack. */
6259 auto Alignment =
6261 ArgOffset = alignTo(ArgOffset, Alignment);
6262
6263 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6264
6265 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6266 };
6267
6268 if (!IsFastCall) {
6269 ComputePtrOff();
6270
6271 /* Compute GPR index associated with argument offset. */
6272 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6273 GPR_idx = std::min(GPR_idx, NumGPRs);
6274 }
6275
6276 // Promote integers to 64-bit values.
6277 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6278 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6279 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6280 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6281 }
6282
6283 // FIXME memcpy is used way more than necessary. Correctness first.
6284 // Note: "by value" is code for passing a structure by value, not
6285 // basic types.
6286 if (Flags.isByVal()) {
6287 // Note: Size includes alignment padding, so
6288 // struct x { short a; char b; }
6289 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6290 // These are the proper values we need for right-justifying the
6291 // aggregate in a parameter register.
6292 unsigned Size = Flags.getByValSize();
6293
6294 // An empty aggregate parameter takes up no storage and no
6295 // registers.
6296 if (Size == 0)
6297 continue;
6298
6299 if (IsFastCall)
6300 ComputePtrOff();
6301
6302 // All aggregates smaller than 8 bytes must be passed right-justified.
6303 if (Size==1 || Size==2 || Size==4) {
6304 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6305 if (GPR_idx != NumGPRs) {
6306 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6307 MachinePointerInfo(), VT);
6308 MemOpChains.push_back(Load.getValue(1));
6309 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6310
6312 continue;
6313 }
6314 }
6315
6316 if (GPR_idx == NumGPRs && Size < 8) {
6318 if (!isLittleEndian) {
6320 PtrOff.getValueType());
6321 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6322 }
6323 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6325 Flags, DAG, dl);
6327 continue;
6328 }
6329 // Copy the object to parameter save area if it can not be entirely passed
6330 // by registers.
6331 // FIXME: we only need to copy the parts which need to be passed in
6332 // parameter save area. For the parts passed by registers, we don't need
6333 // to copy them to the stack although we need to allocate space for them
6334 // in parameter save area.
6335 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6336 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6338 Flags, DAG, dl);
6339
6340 // When a register is available, pass a small aggregate right-justified.
6341 if (Size < 8 && GPR_idx != NumGPRs) {
6342 // The easiest way to get this right-justified in a register
6343 // is to copy the structure into the rightmost portion of a
6344 // local variable slot, then load the whole slot into the
6345 // register.
6346 // FIXME: The memcpy seems to produce pretty awful code for
6347 // small aggregates, particularly for packed ones.
6348 // FIXME: It would be preferable to use the slot in the
6349 // parameter save area instead of a new local variable.
6351 if (!isLittleEndian) {
6352 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6353 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6354 }
6355 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6357 Flags, DAG, dl);
6358
6359 // Load the slot into the register.
6360 SDValue Load =
6361 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6362 MemOpChains.push_back(Load.getValue(1));
6363 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6364
6365 // Done with this argument.
6367 continue;
6368 }
6369
6370 // For aggregates larger than PtrByteSize, copy the pieces of the
6371 // object that fit into registers from the parameter save area.
6372 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6373 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6374 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6375 if (GPR_idx != NumGPRs) {
6376 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6378 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6380
6381 MemOpChains.push_back(Load.getValue(1));
6382 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6384 } else {
6386 break;
6387 }
6388 }
6389 continue;
6390 }
6391
6392 switch (Arg.getSimpleValueType().SimpleTy) {
6393 default: llvm_unreachable("Unexpected ValueType for argument!");
6394 case MVT::i1:
6395 case MVT::i32:
6396 case MVT::i64:
6397 if (Flags.isNest()) {
6398 // The 'nest' parameter, if any, is passed in R11.
6399 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6400 break;
6401 }
6402
6403 // These can be scalar arguments or elements of an integer array type
6404 // passed directly. Clang may use those instead of "byval" aggregate
6405 // types to avoid forcing arguments to memory unnecessarily.
6406 if (GPR_idx != NumGPRs) {
6407 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6408 } else {
6409 if (IsFastCall)
6410 ComputePtrOff();
6411
6413 "Parameter area must exist to pass an argument in memory.");
6414 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6415 true, CFlags.IsTailCall, false, MemOpChains,
6416 TailCallArguments, dl);
6417 if (IsFastCall)
6419 }
6420 if (!IsFastCall)
6422 break;
6423 case MVT::f32:
6424 case MVT::f64: {
6425 // These can be scalar arguments or elements of a float array type
6426 // passed directly. The latter are used to implement ELFv2 homogenous
6427 // float aggregates.
6428
6429 // Named arguments go into FPRs first, and once they overflow, the
6430 // remaining arguments go into GPRs and then the parameter save area.
6431 // Unnamed arguments for vararg functions always go to GPRs and
6432 // then the parameter save area. For now, put all arguments to vararg
6433 // routines always in both locations (FPR *and* GPR or stack slot).
6434 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6435 bool NeededLoad = false;
6436
6437 // First load the argument into the next available FPR.
6438 if (FPR_idx != NumFPRs)
6439 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6440
6441 // Next, load the argument into GPR or stack slot if needed.
6442 if (!NeedGPROrStack)
6443 ;
6444 else if (GPR_idx != NumGPRs && !IsFastCall) {
6445 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6446 // once we support fp <-> gpr moves.
6447
6448 // In the non-vararg case, this can only ever happen in the
6449 // presence of f32 array types, since otherwise we never run
6450 // out of FPRs before running out of GPRs.
6452
6453 // Double values are always passed in a single GPR.
6454 if (Arg.getValueType() != MVT::f32) {
6455 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6456
6457 // Non-array float values are extended and passed in a GPR.
6458 } else if (!Flags.isInConsecutiveRegs()) {
6459 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6460 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6461
6462 // If we have an array of floats, we collect every odd element
6463 // together with its predecessor into one GPR.
6464 } else if (ArgOffset % PtrByteSize != 0) {
6465 SDValue Lo, Hi;
6466 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6467 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6468 if (!isLittleEndian)
6469 std::swap(Lo, Hi);
6470 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6471
6472 // The final element, if even, goes into the first half of a GPR.
6473 } else if (Flags.isInConsecutiveRegsLast()) {
6474 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6475 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6476 if (!isLittleEndian)
6477 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6478 DAG.getConstant(32, dl, MVT::i32));
6479
6480 // Non-final even elements are skipped; they will be handled
6481 // together the with subsequent argument on the next go-around.
6482 } else
6483 ArgVal = SDValue();
6484
6485 if (ArgVal.getNode())
6486 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6487 } else {
6488 if (IsFastCall)
6489 ComputePtrOff();
6490
6491 // Single-precision floating-point values are mapped to the
6492 // second (rightmost) word of the stack doubleword.
6493 if (Arg.getValueType() == MVT::f32 &&
6494 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6495 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6497 }
6498
6500 "Parameter area must exist to pass an argument in memory.");
6501 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6502 true, CFlags.IsTailCall, false, MemOpChains,
6503 TailCallArguments, dl);
6504
6505 NeededLoad = true;
6506 }
6507 // When passing an array of floats, the array occupies consecutive
6508 // space in the argument area; only round up to the next doubleword
6509 // at the end of the array. Otherwise, each float takes 8 bytes.
6510 if (!IsFastCall || NeededLoad) {
6511 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6512 Flags.isInConsecutiveRegs()) ? 4 : 8;
6513 if (Flags.isInConsecutiveRegsLast())
6515 }
6516 break;
6517 }
6518 case MVT::v4f32:
6519 case MVT::v4i32:
6520 case MVT::v8i16:
6521 case MVT::v16i8:
6522 case MVT::v2f64:
6523 case MVT::v2i64:
6524 case MVT::v1i128:
6525 case MVT::f128:
6526 // These can be scalar arguments or elements of a vector array type
6527 // passed directly. The latter are used to implement ELFv2 homogenous
6528 // vector aggregates.
6529
6530 // For a varargs call, named arguments go into VRs or on the stack as
6531 // usual; unnamed arguments always go to the stack or the corresponding
6532 // GPRs when within range. For now, we always put the value in both
6533 // locations (or even all three).
6534 if (CFlags.IsVarArg) {
6536 "Parameter area must exist if we have a varargs call.");
6537 // We could elide this store in the case where the object fits
6538 // entirely in R registers. Maybe later.
6539 SDValue Store =
6540 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6541 MemOpChains.push_back(Store);
6542 if (VR_idx != NumVRs) {
6543 SDValue Load =
6544 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6545 MemOpChains.push_back(Load.getValue(1));
6546 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6547 }
6548 ArgOffset += 16;
6549 for (unsigned i=0; i<16; i+=PtrByteSize) {
6550 if (GPR_idx == NumGPRs)
6551 break;
6552 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6553 DAG.getConstant(i, dl, PtrVT));
6554 SDValue Load =
6555 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6556 MemOpChains.push_back(Load.getValue(1));
6557 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6558 }
6559 break;
6560 }
6561
6562 // Non-varargs Altivec params go into VRs or on the stack.
6563 if (VR_idx != NumVRs) {
6564 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6565 } else {
6566 if (IsFastCall)
6567 ComputePtrOff();
6568
6570 "Parameter area must exist to pass an argument in memory.");
6571 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572 true, CFlags.IsTailCall, true, MemOpChains,
6573 TailCallArguments, dl);
6574 if (IsFastCall)
6575 ArgOffset += 16;
6576 }
6577
6578 if (!IsFastCall)
6579 ArgOffset += 16;
6580 break;
6581 }
6582 }
6583
6585 "mismatch in size of parameter area");
6587
6588 if (!MemOpChains.empty())
6589 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6590
6591 // Check if this is an indirect call (MTCTR/BCTRL).
6592 // See prepareDescriptorIndirectCall and buildCallOperands for more
6593 // information about calls through function pointers in the 64-bit SVR4 ABI.
6594 if (CFlags.IsIndirect) {
6595 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6596 // caller in the TOC save area.
6597 if (isTOCSaveRestoreRequired(Subtarget)) {
6598 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6599 // Load r2 into a virtual register and store it to the TOC save area.
6600 setUsesTOCBasePtr(DAG);
6601 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6602 // TOC save area offset.
6603 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6604 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6605 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6606 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6608 DAG.getMachineFunction(), TOCSaveOffset));
6609 }
6610 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6611 // This does not mean the MTCTR instruction must use R12; it's easier
6612 // to model this as an extra parameter, so do that.
6613 if (isELFv2ABI && !CFlags.IsPatchPoint)
6614 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6615 }
6616
6617 // Build a sequence of copy-to-reg nodes chained together with token chain
6618 // and flag operands which copy the outgoing args into the appropriate regs.
6620 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6621 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6622 RegsToPass[i].second, InGlue);
6623 InGlue = Chain.getValue(1);
6624 }
6625
6626 if (CFlags.IsTailCall && !IsSibCall)
6627 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6629
6630 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6631 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6632}
6633
6634// Returns true when the shadow of a general purpose argument register
6635// in the parameter save area is aligned to at least 'RequiredAlign'.
6637 assert(RequiredAlign.value() <= 16 &&
6638 "Required alignment greater than stack alignment.");
6639 switch (Reg) {
6640 default:
6641 report_fatal_error("called on invalid register.");
6642 case PPC::R5:
6643 case PPC::R9:
6644 case PPC::X3:
6645 case PPC::X5:
6646 case PPC::X7:
6647 case PPC::X9:
6648 // These registers are 16 byte aligned which is the most strict aligment
6649 // we can support.
6650 return true;
6651 case PPC::R3:
6652 case PPC::R7:
6653 case PPC::X4:
6654 case PPC::X6:
6655 case PPC::X8:
6656 case PPC::X10:
6657 // The shadow of these registers in the PSA is 8 byte aligned.
6658 return RequiredAlign <= 8;
6659 case PPC::R4:
6660 case PPC::R6:
6661 case PPC::R8:
6662 case PPC::R10:
6663 return RequiredAlign <= 4;
6664 }
6665}
6666
6667static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6669 CCState &S) {
6670 AIXCCState &State = static_cast<AIXCCState &>(S);
6671 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6672 State.getMachineFunction().getSubtarget());
6673 const bool IsPPC64 = Subtarget.isPPC64();
6674 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6675 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6676
6677 if (ValVT == MVT::f128)
6678 report_fatal_error("f128 is unimplemented on AIX.");
6679
6680 if (ArgFlags.isNest())
6681 report_fatal_error("Nest arguments are unimplemented.");
6682
6683 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6684 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6685 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6686 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6687 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6688 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6689
6690 static const MCPhysReg VR[] = {// Vector registers.
6691 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6692 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6693 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6694
6695 if (ArgFlags.isByVal()) {
6696 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6697 report_fatal_error("Pass-by-value arguments with alignment greater than "
6698 "register width are not supported.");
6699
6700 const unsigned ByValSize = ArgFlags.getByValSize();
6701
6702 // An empty aggregate parameter takes up no storage and no registers,
6703 // but needs a MemLoc for a stack slot for the formal arguments side.
6704 if (ByValSize == 0) {
6706 State.getStackSize(), RegVT, LocInfo));
6707 return false;
6708 }
6709
6710 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6711 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6712 for (const unsigned E = Offset + StackSize; Offset < E;
6713 Offset += PtrAlign.value()) {
6714 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6715 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6716 else {
6719 LocInfo));
6720 break;
6721 }
6722 }
6723 return false;
6724 }
6725
6726 // Arguments always reserve parameter save area.
6727 switch (ValVT.SimpleTy) {
6728 default:
6729 report_fatal_error("Unhandled value type for argument.");
6730 case MVT::i64:
6731 // i64 arguments should have been split to i32 for PPC32.
6732 assert(IsPPC64 && "PPC32 should have split i64 values.");
6733 [[fallthrough]];
6734 case MVT::i1:
6735 case MVT::i32: {
6736 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6737 // AIX integer arguments are always passed in register width.
6738 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6739 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6741 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6742 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6743 else
6744 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6745
6746 return false;
6747 }
6748 case MVT::f32:
6749 case MVT::f64: {
6750 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6751 const unsigned StoreSize = LocVT.getStoreSize();
6752 // Floats are always 4-byte aligned in the PSA on AIX.
6753 // This includes f64 in 64-bit mode for ABI compatibility.
6754 const unsigned Offset =
6755 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6756 unsigned FReg = State.AllocateReg(FPR);
6757 if (FReg)
6758 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6759
6760 // Reserve and initialize GPRs or initialize the PSA as required.
6761 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6762 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6763 assert(FReg && "An FPR should be available when a GPR is reserved.");
6764 if (State.isVarArg()) {
6765 // Successfully reserved GPRs are only initialized for vararg calls.
6766 // Custom handling is required for:
6767 // f64 in PPC32 needs to be split into 2 GPRs.
6768 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6769 State.addLoc(
6770 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6771 }
6772 } else {
6773 // If there are insufficient GPRs, the PSA needs to be initialized.
6774 // Initialization occurs even if an FPR was initialized for
6775 // compatibility with the AIX XL compiler. The full memory for the
6776 // argument will be initialized even if a prior word is saved in GPR.
6777 // A custom memLoc is used when the argument also passes in FPR so
6778 // that the callee handling can skip over it easily.
6779 State.addLoc(
6780 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6781 LocInfo)
6782 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6783 break;
6784 }
6785 }
6786
6787 return false;
6788 }
6789 case MVT::v4f32:
6790 case MVT::v4i32:
6791 case MVT::v8i16:
6792 case MVT::v16i8:
6793 case MVT::v2i64:
6794 case MVT::v2f64:
6795 case MVT::v1i128: {
6796 const unsigned VecSize = 16;
6797 const Align VecAlign(VecSize);
6798
6799 if (!State.isVarArg()) {
6800 // If there are vector registers remaining we don't consume any stack
6801 // space.
6802 if (unsigned VReg = State.AllocateReg(VR)) {
6803 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6804 return false;
6805 }
6806 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6807 // might be allocated in the portion of the PSA that is shadowed by the
6808 // GPRs.
6809 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6810 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6811 return false;
6812 }
6813
6814 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6815 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6816
6817 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6818 // Burn any underaligned registers and their shadowed stack space until
6819 // we reach the required alignment.
6820 while (NextRegIndex != GPRs.size() &&
6821 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6822 // Shadow allocate register and its stack shadow.
6823 unsigned Reg = State.AllocateReg(GPRs);
6824 State.AllocateStack(PtrSize, PtrAlign);
6825 assert(Reg && "Allocating register unexpectedly failed.");
6826 (void)Reg;
6827 NextRegIndex = State.getFirstUnallocated(GPRs);
6828 }
6829
6830 // Vectors that are passed as fixed arguments are handled differently.
6831 // They are passed in VRs if any are available (unlike arguments passed
6832 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6833 // functions)
6834 if (State.isFixed(ValNo)) {
6835 if (unsigned VReg = State.AllocateReg(VR)) {
6836 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6837 // Shadow allocate GPRs and stack space even though we pass in a VR.
6838 for (unsigned I = 0; I != VecSize; I += PtrSize)
6839 State.AllocateReg(GPRs);
6840 State.AllocateStack(VecSize, VecAlign);
6841 return false;
6842 }
6843 // No vector registers remain so pass on the stack.
6844 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6845 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6846 return false;
6847 }
6848
6849 // If all GPRS are consumed then we pass the argument fully on the stack.
6850 if (NextRegIndex == GPRs.size()) {
6851 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6852 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6853 return false;
6854 }
6855
6856 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6857 // half of the argument, and then need to pass the remaining half on the
6858 // stack.
6859 if (GPRs[NextRegIndex] == PPC::R9) {
6860 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6861 State.addLoc(
6862 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6863
6864 const unsigned FirstReg = State.AllocateReg(PPC::R9);
6865 const unsigned SecondReg = State.AllocateReg(PPC::R10);
6867 "Allocating R9 or R10 unexpectedly failed.");
6868 State.addLoc(
6869 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6870 State.addLoc(
6871 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6872 return false;
6873 }
6874
6875 // We have enough GPRs to fully pass the vector argument, and we have
6876 // already consumed any underaligned registers. Start with the custom
6877 // MemLoc and then the custom RegLocs.
6878 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6879 State.addLoc(
6880 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6881 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6882 const unsigned Reg = State.AllocateReg(GPRs);
6883 assert(Reg && "Failed to allocated register for vararg vector argument");
6884 State.addLoc(
6885 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6886 }
6887 return false;
6888 }
6889 }
6890 return true;
6891}
6892
6893// So far, this function is only used by LowerFormalArguments_AIX()
6895 bool IsPPC64,
6896 bool HasP8Vector,
6897 bool HasVSX) {
6898 assert((IsPPC64 || SVT != MVT::i64) &&
6899 "i64 should have been split for 32-bit codegen.");
6900
6901 switch (SVT) {
6902 default:
6903 report_fatal_error("Unexpected value type for formal argument");
6904 case MVT::i1:
6905 case MVT::i32:
6906 case MVT::i64:
6907 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6908 case MVT::f32:
6909 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6910 case MVT::f64:
6911 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6912 case MVT::v4f32:
6913 case MVT::v4i32:
6914 case MVT::v8i16:
6915 case MVT::v16i8:
6916 case MVT::v2i64:
6917 case MVT::v2f64:
6918 case MVT::v1i128:
6919 return &PPC::VRRCRegClass;
6920 }
6921}
6922
6924 SelectionDAG &DAG, SDValue ArgValue,
6925 MVT LocVT, const SDLoc &dl) {
6926 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6927 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6928
6929 if (Flags.isSExt())
6930 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6931 DAG.getValueType(ValVT));
6932 else if (Flags.isZExt())
6933 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6934 DAG.getValueType(ValVT));
6935
6936 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6937}
6938
6939static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6940 const unsigned LASize = FL->getLinkageSize();
6941
6942 if (PPC::GPRCRegClass.contains(Reg)) {
6943 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6944 "Reg must be a valid argument register!");
6945 return LASize + 4 * (Reg - PPC::R3);
6946 }
6947
6948 if (PPC::G8RCRegClass.contains(Reg)) {
6949 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6950 "Reg must be a valid argument register!");
6951 return LASize + 8 * (Reg - PPC::X3);
6952 }
6953
6954 llvm_unreachable("Only general purpose registers expected.");
6955}
6956
6957// AIX ABI Stack Frame Layout:
6958//
6959// Low Memory +--------------------------------------------+
6960// SP +---> | Back chain | ---+
6961// | +--------------------------------------------+ |
6962// | | Saved Condition Register | |
6963// | +--------------------------------------------+ |
6964// | | Saved Linkage Register | |
6965// | +--------------------------------------------+ | Linkage Area
6966// | | Reserved for compilers | |
6967// | +--------------------------------------------+ |
6968// | | Reserved for binders | |
6969// | +--------------------------------------------+ |
6970// | | Saved TOC pointer | ---+
6971// | +--------------------------------------------+
6972// | | Parameter save area |
6973// | +--------------------------------------------+
6974// | | Alloca space |
6975// | +--------------------------------------------+
6976// | | Local variable space |
6977// | +--------------------------------------------+
6978// | | Float/int conversion temporary |
6979// | +--------------------------------------------+
6980// | | Save area for AltiVec registers |
6981// | +--------------------------------------------+
6982// | | AltiVec alignment padding |
6983// | +--------------------------------------------+
6984// | | Save area for VRSAVE register |
6985// | +--------------------------------------------+
6986// | | Save area for General Purpose registers |
6987// | +--------------------------------------------+
6988// | | Save area for Floating Point registers |
6989// | +--------------------------------------------+
6990// +---- | Back chain |
6991// High Memory +--------------------------------------------+
6992//
6993// Specifications:
6994// AIX 7.2 Assembler Language Reference
6995// Subroutine linkage convention
6996
6997SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6998 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6999 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7000 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7001
7002 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7003 CallConv == CallingConv::Fast) &&
7004 "Unexpected calling convention!");
7005
7006 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7007 report_fatal_error("Tail call support is unimplemented on AIX.");
7008
7009 if (useSoftFloat())
7010 report_fatal_error("Soft float support is unimplemented on AIX.");
7011
7012 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7013
7014 const bool IsPPC64 = Subtarget.isPPC64();
7015 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7016
7017 // Assign locations to all of the incoming arguments.
7020 MachineFrameInfo &MFI = MF.getFrameInfo();
7021 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7022 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7023
7024 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7025 // Reserve space for the linkage area on the stack.
7026 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7027 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7028 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7029
7031
7032 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7033 CCValAssign &VA = ArgLocs[I++];
7034 MVT LocVT = VA.getLocVT();
7035 MVT ValVT = VA.getValVT();
7036 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7037 // For compatibility with the AIX XL compiler, the float args in the
7038 // parameter save area are initialized even if the argument is available
7039 // in register. The caller is required to initialize both the register
7040 // and memory, however, the callee can choose to expect it in either.
7041 // The memloc is dismissed here because the argument is retrieved from
7042 // the register.
7043 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7044 continue;
7045
7046 auto HandleMemLoc = [&]() {
7047 const unsigned LocSize = LocVT.getStoreSize();
7048 const unsigned ValSize = ValVT.getStoreSize();
7049 assert((ValSize <= LocSize) &&
7050 "Object size is larger than size of MemLoc");
7051 int CurArgOffset = VA.getLocMemOffset();
7052 // Objects are right-justified because AIX is big-endian.
7053 if (LocSize > ValSize)
7054 CurArgOffset += LocSize - ValSize;
7055 // Potential tail calls could cause overwriting of argument stack slots.
7056 const bool IsImmutable =
7058 (CallConv == CallingConv::Fast));
7059 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7060 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7061 SDValue ArgValue =
7062 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7063 InVals.push_back(ArgValue);
7064 };
7065
7066 // Vector arguments to VaArg functions are passed both on the stack, and
7067 // in any available GPRs. Load the value from the stack and add the GPRs
7068 // as live ins.
7069 if (VA.isMemLoc() && VA.needsCustom()) {
7070 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7071 assert(isVarArg && "Only use custom memloc for vararg.");
7072 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7073 // matching custom RegLocs.
7074 const unsigned OriginalValNo = VA.getValNo();
7076
7077 auto HandleCustomVecRegLoc = [&]() {
7078 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7079 "Missing custom RegLoc.");
7080 VA = ArgLocs[I++];
7081 assert(VA.getValVT().isVector() &&
7082 "Unexpected Val type for custom RegLoc.");
7083 assert(VA.getValNo() == OriginalValNo &&
7084 "ValNo mismatch between custom MemLoc and RegLoc.");
7085 MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7086 MF.addLiveIn(VA.getLocReg(),
7087 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7088 Subtarget.hasVSX()));
7089 };
7090
7091 HandleMemLoc();
7092 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7093 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7094 // R10.
7097
7098 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7099 // we passed the vector in R5, R6, R7 and R8.
7100 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7101 assert(!IsPPC64 &&
7102 "Only 2 custom RegLocs expected for 64-bit codegen.");
7105 }
7106
7107 continue;
7108 }
7109
7110 if (VA.isRegLoc()) {
7111 if (VA.getValVT().isScalarInteger())
7113 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7114 switch (VA.getValVT().SimpleTy) {
7115 default:
7116 report_fatal_error("Unhandled value type for argument.");
7117 case MVT::f32:
7119 break;
7120 case MVT::f64:
7122 break;
7123 }
7124 } else if (VA.getValVT().isVector()) {
7125 switch (VA.getValVT().SimpleTy) {
7126 default:
7127 report_fatal_error("Unhandled value type for argument.");
7128 case MVT::v16i8:
7130 break;
7131 case MVT::v8i16:
7133 break;
7134 case MVT::v4i32:
7135 case MVT::v2i64:
7136 case MVT::v1i128:
7138 break;
7139 case MVT::v4f32:
7140 case MVT::v2f64:
7142 break;
7143 }
7144 }
7145 }
7146
7147 if (Flags.isByVal() && VA.isMemLoc()) {
7148 const unsigned Size =
7149 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7150 PtrByteSize);
7151 const int FI = MF.getFrameInfo().CreateFixedObject(
7152 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7153 /* IsAliased */ true);
7154 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7155 InVals.push_back(FIN);
7156
7157 continue;
7158 }
7159
7160 if (Flags.isByVal()) {
7161 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7162
7163 const MCPhysReg ArgReg = VA.getLocReg();
7164 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7165
7166 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7167 report_fatal_error("Over aligned byvals not supported yet.");
7168
7169 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7170 const int FI = MF.getFrameInfo().CreateFixedObject(
7171 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7172 /* IsAliased */ true);
7173 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7174 InVals.push_back(FIN);
7175
7176 // Add live ins for all the RegLocs for the same ByVal.
7177 const TargetRegisterClass *RegClass =
7178 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7179
7180 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7181 unsigned Offset) {
7182 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7183 // Since the callers side has left justified the aggregate in the
7184 // register, we can simply store the entire register into the stack
7185 // slot.
7186 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7187 // The store to the fixedstack object is needed becuase accessing a
7188 // field of the ByVal will use a gep and load. Ideally we will optimize
7189 // to extracting the value from the register directly, and elide the
7190 // stores when the arguments address is not taken, but that will need to
7191 // be future work.
7192 SDValue Store = DAG.getStore(
7193 CopyFrom.getValue(1), dl, CopyFrom,
7196
7197 MemOps.push_back(Store);
7198 };
7199
7200 unsigned Offset = 0;
7201 HandleRegLoc(VA.getLocReg(), Offset);
7203 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7204 Offset += PtrByteSize) {
7205 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7206 "RegLocs should be for ByVal argument.");
7207
7208 const CCValAssign RL = ArgLocs[I++];
7211 }
7212
7213 if (Offset != StackSize) {
7214 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7215 "Expected MemLoc for remaining bytes.");
7216 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7217 // Consume the MemLoc.The InVal has already been emitted, so nothing
7218 // more needs to be done.
7219 ++I;
7220 }
7221
7222 continue;
7223 }
7224
7225 if (VA.isRegLoc() && !VA.needsCustom()) {
7227 Register VReg =
7228 MF.addLiveIn(VA.getLocReg(),
7229 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7230 Subtarget.hasVSX()));
7231 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7232 if (ValVT.isScalarInteger() &&
7233 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7234 ArgValue =
7235 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7236 }
7237 InVals.push_back(ArgValue);
7238 continue;
7239 }
7240 if (VA.isMemLoc()) {
7241 HandleMemLoc();
7242 continue;
7243 }
7244 }
7245
7246 // On AIX a minimum of 8 words is saved to the parameter save area.
7247 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7248 // Area that is at least reserved in the caller of this function.
7249 unsigned CallerReservedArea = std::max<unsigned>(
7250 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7251
7252 // Set the size that is at least reserved in caller of this function. Tail
7253 // call optimized function's reserved stack space needs to be aligned so
7254 // that taking the difference between two stack areas will result in an
7255 // aligned stack.
7259
7260 if (isVarArg) {
7261 FuncInfo->setVarArgsFrameIndex(
7262 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7264
7265 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7266 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7267
7268 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7269 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7270 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7271
7272 // The fixed integer arguments of a variadic function are stored to the
7273 // VarArgsFrameIndex on the stack so that they may be loaded by
7274 // dereferencing the result of va_next.
7275 for (unsigned GPRIndex =
7276 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7278
7279 const Register VReg =
7280 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7281 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7282
7283 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7284 SDValue Store =
7285 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7286 MemOps.push_back(Store);
7287 // Increment the address for the next argument to store.
7289 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7290 }
7291 }
7292
7293 if (!MemOps.empty())
7294 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7295
7296 return Chain;
7297}
7298
7299SDValue PPCTargetLowering::LowerCall_AIX(
7300 SDValue Chain, SDValue Callee, CallFlags CFlags,
7302 const SmallVectorImpl<SDValue> &OutVals,
7303 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7305 const CallBase *CB) const {
7306 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7307 // AIX ABI stack frame layout.
7308
7309 assert((CFlags.CallConv == CallingConv::C ||
7310 CFlags.CallConv == CallingConv::Cold ||
7311 CFlags.CallConv == CallingConv::Fast) &&
7312 "Unexpected calling convention!");
7313
7314 if (CFlags.IsPatchPoint)
7315 report_fatal_error("This call type is unimplemented on AIX.");
7316
7317 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7318
7321 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7322 *DAG.getContext());
7323
7324 // Reserve space for the linkage save area (LSA) on the stack.
7325 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7326 // [SP][CR][LR][2 x reserved][TOC].
7327 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7328 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7329 const bool IsPPC64 = Subtarget.isPPC64();
7330 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7331 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7332 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7333 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7334
7335 // The prolog code of the callee may store up to 8 GPR argument registers to
7336 // the stack, allowing va_start to index over them in memory if the callee
7337 // is variadic.
7338 // Because we cannot tell if this is needed on the caller side, we have to
7339 // conservatively assume that it is needed. As such, make sure we have at
7340 // least enough stack space for the caller to store the 8 GPRs.
7341 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7342 const unsigned NumBytes = std::max<unsigned>(
7343 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7344
7345 // Adjust the stack pointer for the new arguments...
7346 // These operations are automatically eliminated by the prolog/epilog pass.
7347 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7348 SDValue CallSeqStart = Chain;
7349
7352
7353 // Set up a copy of the stack pointer for loading and storing any
7354 // arguments that may not fit in the registers available for argument
7355 // passing.
7356 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7357 : DAG.getRegister(PPC::R1, MVT::i32);
7358
7359 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7360 const unsigned ValNo = ArgLocs[I].getValNo();
7361 SDValue Arg = OutVals[ValNo];
7362 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7363
7364 if (Flags.isByVal()) {
7365 const unsigned ByValSize = Flags.getByValSize();
7366
7367 // Nothing to do for zero-sized ByVals on the caller side.
7368 if (!ByValSize) {
7369 ++I;
7370 continue;
7371 }
7372
7373 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7374 return DAG.getExtLoad(
7375 ISD::ZEXTLOAD, dl, PtrVT, Chain,
7376 (LoadOffset != 0)
7378 : Arg,
7379 MachinePointerInfo(), VT);
7380 };
7381
7382 unsigned LoadOffset = 0;
7383
7384 // Initialize registers, which are fully occupied by the by-val argument.
7385 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7387 MemOpChains.push_back(Load.getValue(1));
7389 const CCValAssign &ByValVA = ArgLocs[I++];
7390 assert(ByValVA.getValNo() == ValNo &&
7391 "Unexpected location for pass-by-value argument.");
7392 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7393 }
7394
7395 if (LoadOffset == ByValSize)
7396 continue;
7397
7398 // There must be one more loc to handle the remainder.
7399 assert(ArgLocs[I].getValNo() == ValNo &&
7400 "Expected additional location for by-value argument.");
7401
7402 if (ArgLocs[I].isMemLoc()) {
7403 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7404 const CCValAssign &ByValVA = ArgLocs[I++];
7406 // Only memcpy the bytes that don't pass in register.
7407 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7408 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7409 (LoadOffset != 0)
7411 : Arg,
7412 DAG.getObjectPtrOffset(dl, StackPtr,
7413 TypeSize::Fixed(ByValVA.getLocMemOffset())),
7414 CallSeqStart, MemcpyFlags, DAG, dl);
7415 continue;
7416 }
7417
7418 // Initialize the final register residue.
7419 // Any residue that occupies the final by-val arg register must be
7420 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7421 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7422 // 2 and 1 byte loads.
7423 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7425 "Unexpected register residue for by-value argument.");
7427 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7428 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7429 const MVT VT =
7430 N == 1 ? MVT::i8
7431 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7433 MemOpChains.push_back(Load.getValue(1));
7434 LoadOffset += N;
7435 Bytes += N;
7436
7437 // By-val arguments are passed left-justfied in register.
7438 // Every load here needs to be shifted, otherwise a full register load
7439 // should have been used.
7440 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7441 "Unexpected load emitted during handling of pass-by-value "
7442 "argument.");
7443 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7445 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7448 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7451 : ShiftedLoad;
7452 }
7453
7454 const CCValAssign &ByValVA = ArgLocs[I++];
7455 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7456 continue;
7457 }
7458
7459 CCValAssign &VA = ArgLocs[I++];
7460 const MVT LocVT = VA.getLocVT();
7461 const MVT ValVT = VA.getValVT();
7462
7463 switch (VA.getLocInfo()) {
7464 default:
7465 report_fatal_error("Unexpected argument extension type.");
7466 case CCValAssign::Full:
7467 break;
7468 case CCValAssign::ZExt:
7469 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7470 break;
7471 case CCValAssign::SExt:
7472 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7473 break;
7474 }
7475
7476 if (VA.isRegLoc() && !VA.needsCustom()) {
7477 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7478 continue;
7479 }
7480
7481 // Vector arguments passed to VarArg functions need custom handling when
7482 // they are passed (at least partially) in GPRs.
7483 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7484 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7485 // Store value to its stack slot.
7486 SDValue PtrOff =
7487 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7488 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7489 SDValue Store =
7490 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7491 MemOpChains.push_back(Store);
7492 const unsigned OriginalValNo = VA.getValNo();
7493 // Then load the GPRs from the stack
7494 unsigned LoadOffset = 0;
7495 auto HandleCustomVecRegLoc = [&]() {
7496 assert(I != E && "Unexpected end of CCvalAssigns.");
7497 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7498 "Expected custom RegLoc.");
7500 assert(RegVA.getValNo() == OriginalValNo &&
7501 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7502 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7503 DAG.getConstant(LoadOffset, dl, PtrVT));
7504 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7505 MemOpChains.push_back(Load.getValue(1));
7506 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7508 };
7509
7510 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7511 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7512 // R10.
7515
7516 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7517 ArgLocs[I].getValNo() == OriginalValNo) {
7518 assert(!IsPPC64 &&
7519 "Only 2 custom RegLocs expected for 64-bit codegen.");
7522 }
7523
7524 continue;
7525 }
7526
7527 if (VA.isMemLoc()) {
7528 SDValue PtrOff =
7529 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7530 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7531 MemOpChains.push_back(
7532 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7533
7534 continue;
7535 }
7536
7537 if (!ValVT.isFloatingPoint())
7539 "Unexpected register handling for calling convention.");
7540
7541 // Custom handling is used for GPR initializations for vararg float
7542 // arguments.
7543 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7544 LocVT.isInteger() &&
7545 "Custom register handling only expected for VarArg.");
7546
7549
7550 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7551 // f32 in 32-bit GPR
7552 // f64 in 64-bit GPR
7553 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7554 else if (Arg.getValueType().getFixedSizeInBits() <
7555 LocVT.getFixedSizeInBits())
7556 // f32 in 64-bit GPR.
7557 RegsToPass.push_back(std::make_pair(
7558 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7559 else {
7560 // f64 in two 32-bit GPRs
7561 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7562 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7563 "Unexpected custom register for argument!");
7564 CCValAssign &GPR1 = VA;
7565 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7566 DAG.getConstant(32, dl, MVT::i8));
7567 RegsToPass.push_back(std::make_pair(
7568 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7569
7570 if (I != E) {
7571 // If only 1 GPR was available, there will only be one custom GPR and
7572 // the argument will also pass in memory.
7574 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7575 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7576 CCValAssign &GPR2 = ArgLocs[I++];
7577 RegsToPass.push_back(std::make_pair(
7578 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7579 }
7580 }
7581 }
7582 }
7583
7584 if (!MemOpChains.empty())
7585 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7586
7587 // For indirect calls, we need to save the TOC base to the stack for
7588 // restoration after the call.
7589 if (CFlags.IsIndirect) {
7590 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7591 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7593 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7594 const unsigned TOCSaveOffset =
7595 Subtarget.getFrameLowering()->getTOCSaveOffset();
7596
7597 setUsesTOCBasePtr(DAG);
7598 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7599 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7601 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7602 Chain = DAG.getStore(
7603 Val.getValue(1), dl, Val, AddPtr,
7604 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7605 }
7606
7607 // Build a sequence of copy-to-reg nodes chained together with token chain
7608 // and flag operands which copy the outgoing args into the appropriate regs.
7610 for (auto Reg : RegsToPass) {
7611 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7612 InGlue = Chain.getValue(1);
7613 }
7614
7615 const int SPDiff = 0;
7616 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7617 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7618}
7619
7620bool
7621PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7622 MachineFunction &MF, bool isVarArg,
7624 LLVMContext &Context) const {
7626 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7627 return CCInfo.CheckReturn(
7628 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7630 : RetCC_PPC);
7631}
7632
7633SDValue
7634PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7635 bool isVarArg,
7637 const SmallVectorImpl<SDValue> &OutVals,
7638 const SDLoc &dl, SelectionDAG &DAG) const {
7640 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7641 *DAG.getContext());
7642 CCInfo.AnalyzeReturn(Outs,
7643 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7645 : RetCC_PPC);
7646
7647 SDValue Glue;
7649
7650 // Copy the result values into the output registers.
7651 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7652 CCValAssign &VA = RVLocs[i];
7653 assert(VA.isRegLoc() && "Can only return in registers!");
7654
7655 SDValue Arg = OutVals[RealResIdx];
7656
7657 switch (VA.getLocInfo()) {
7658 default: llvm_unreachable("Unknown loc info!");
7659 case CCValAssign::Full: break;
7660 case CCValAssign::AExt:
7661 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7662 break;
7663 case CCValAssign::ZExt:
7664 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7665 break;
7666 case CCValAssign::SExt:
7667 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7668 break;
7669 }
7670 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7671 bool isLittleEndian = Subtarget.isLittleEndian();
7672 // Legalize ret f64 -> ret 2 x i32.
7673 SDValue SVal =
7674 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7675 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7676 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7677 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7678 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7679 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7680 Glue = Chain.getValue(1);
7681 VA = RVLocs[++i]; // skip ahead to next loc
7682 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7683 } else
7684 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7685 Glue = Chain.getValue(1);
7686 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7687 }
7688
7689 RetOps[0] = Chain; // Update chain.
7690
7691 // Add the glue if we have it.
7692 if (Glue.getNode())
7693 RetOps.push_back(Glue);
7694
7695 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7696}
7697
7698SDValue
7699PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7700 SelectionDAG &DAG) const {
7701 SDLoc dl(Op);
7702
7703 // Get the correct type for integers.
7704 EVT IntVT = Op.getValueType();
7705
7706 // Get the inputs.
7707 SDValue Chain = Op.getOperand(0);
7708 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7709 // Build a DYNAREAOFFSET node.
7710 SDValue Ops[2] = {Chain, FPSIdx};
7711 SDVTList VTs = DAG.getVTList(IntVT);
7712 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7713}
7714
7715SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7716 SelectionDAG &DAG) const {
7717 // When we pop the dynamic allocation we need to restore the SP link.
7718 SDLoc dl(Op);
7719
7720 // Get the correct type for pointers.
7722
7723 // Construct the stack pointer operand.
7724 bool isPPC64 = Subtarget.isPPC64();
7725 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7726 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7727
7728 // Get the operands for the STACKRESTORE.
7729 SDValue Chain = Op.getOperand(0);
7730 SDValue SaveSP = Op.getOperand(1);
7731
7732 // Load the old link SP.
7734 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7735
7736 // Restore the stack pointer.
7737 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7738
7739 // Store the old link SP.
7740 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7741}
7742
7743SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7745 bool isPPC64 = Subtarget.isPPC64();
7747
7748 // Get current frame pointer save index. The users of this index will be
7749 // primarily DYNALLOC instructions.
7751 int RASI = FI->getReturnAddrSaveIndex();
7752
7753 // If the frame pointer save index hasn't been defined yet.
7754 if (!RASI) {
7755 // Find out what the fix offset of the frame pointer save area.
7756 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7757 // Allocate the frame index for frame pointer save area.
7758 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7759 // Save the result.
7761 }
7762 return DAG.getFrameIndex(RASI, PtrVT);
7763}
7764
7765SDValue
7766PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7768 bool isPPC64 = Subtarget.isPPC64();
7770
7771 // Get current frame pointer save index. The users of this index will be
7772 // primarily DYNALLOC instructions.
7774 int FPSI = FI->getFramePointerSaveIndex();
7775
7776 // If the frame pointer save index hasn't been defined yet.
7777 if (!FPSI) {
7778 // Find out what the fix offset of the frame pointer save area.
7779 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7780 // Allocate the frame index for frame pointer save area.
7781 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7782 // Save the result.
7784 }
7785 return DAG.getFrameIndex(FPSI, PtrVT);
7786}
7787
7788SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7789 SelectionDAG &DAG) const {
7791 // Get the inputs.
7792 SDValue Chain = Op.getOperand(0);
7793 SDValue Size = Op.getOperand(1);
7794 SDLoc dl(Op);
7795
7796 // Get the correct type for pointers.
7798 // Negate the size.
7800 DAG.getConstant(0, dl, PtrVT), Size);
7801 // Construct a node for the frame pointer save index.
7802 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7803 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7804 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7805 if (hasInlineStackProbe(MF))
7806 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7807 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7808}
7809
7810SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7811 SelectionDAG &DAG) const {
7813
7814 bool isPPC64 = Subtarget.isPPC64();
7816
7817 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7818 return DAG.getFrameIndex(FI, PtrVT);
7819}
7820
7821SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7822 SelectionDAG &DAG) const {
7823 SDLoc DL(Op);
7824 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7825 DAG.getVTList(MVT::i32, MVT::Other),
7826 Op.getOperand(0), Op.getOperand(1));
7827}
7828
7829SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7830 SelectionDAG &DAG) const {
7831 SDLoc DL(Op);
7832 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7833 Op.getOperand(0), Op.getOperand(1));
7834}
7835
7836SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7837 if (Op.getValueType().isVector())
7838 return LowerVectorLoad(Op, DAG);
7839
7840 assert(Op.getValueType() == MVT::i1 &&
7841 "Custom lowering only for i1 loads");
7842
7843 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7844
7845 SDLoc dl(Op);
7847
7848 SDValue Chain = LD->getChain();
7849 SDValue BasePtr = LD->getBasePtr();
7850 MachineMemOperand *MMO = LD->getMemOperand();
7851
7852 SDValue NewLD =
7853 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7854 BasePtr, MVT::i8, MMO);
7855 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7856
7857 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7858 return DAG.getMergeValues(Ops, dl);
7859}
7860
7861SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7862 if (Op.getOperand(1).getValueType().isVector())
7863 return LowerVectorStore(Op, DAG);
7864
7865 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7866 "Custom lowering only for i1 stores");
7867
7868 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7869
7870 SDLoc dl(Op);
7872
7873 SDValue Chain = ST->getChain();
7874 SDValue BasePtr = ST->getBasePtr();
7875 SDValue Value = ST->getValue();
7876 MachineMemOperand *MMO = ST->getMemOperand();
7877
7879 Value);
7880 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7881}
7882
7883// FIXME: Remove this once the ANDI glue bug is fixed:
7884SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7885 assert(Op.getValueType() == MVT::i1 &&
7886 "Custom lowering only for i1 results");
7887
7888 SDLoc DL(Op);
7889 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7890}
7891
7892SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7893 SelectionDAG &DAG) const {
7894
7895 // Implements a vector truncate that fits in a vector register as a shuffle.
7896 // We want to legalize vector truncates down to where the source fits in
7897 // a vector register (and target is therefore smaller than vector register
7898 // size). At that point legalization will try to custom lower the sub-legal
7899 // result and get here - where we can contain the truncate as a single target
7900 // operation.
7901
7902 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7903 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7904 //
7905 // We will implement it for big-endian ordering as this (where x denotes
7906 // undefined):
7907 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7908 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7909 //
7910 // The same operation in little-endian ordering will be:
7911 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7912 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7913
7914 EVT TrgVT = Op.getValueType();
7915 assert(TrgVT.isVector() && "Vector type expected.");
7916 unsigned TrgNumElts = TrgVT.getVectorNumElements();
7917 EVT EltVT = TrgVT.getVectorElementType();
7918 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7919 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7920 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
7921 return SDValue();
7922
7923 SDValue N1 = Op.getOperand(0);
7924 EVT SrcVT = N1.getValueType();
7925 unsigned SrcSize = SrcVT.getSizeInBits();
7926 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7928 SrcVT.getVectorElementType().getSizeInBits()))
7929 return SDValue();
7930 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7931 return SDValue();
7932
7933 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7935
7936 SDLoc DL(Op);
7937 SDValue Op1, Op2;
7938 if (SrcSize == 256) {
7940 EVT SplitVT =
7941 N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7942 unsigned SplitNumElts = SplitVT.getVectorNumElements();
7944 DAG.getConstant(0, DL, VecIdxTy));
7947 }
7948 else {
7949 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7950 Op2 = DAG.getUNDEF(WideVT);
7951 }
7952
7953 // First list the elements we want to keep.
7954 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7956 if (Subtarget.isLittleEndian())
7957 for (unsigned i = 0; i < TrgNumElts; ++i)
7958 ShuffV.push_back(i * SizeMult);
7959 else
7960 for (unsigned i = 1; i <= TrgNumElts; ++i)
7961 ShuffV.push_back(i * SizeMult - 1);
7962
7963 // Populate the remaining elements with undefs.
7964 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7965 // ShuffV.push_back(i + WideNumElts);
7966 ShuffV.push_back(WideNumElts + 1);
7967
7968 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7969 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7970 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7971}
7972
7973/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7974/// possible.
7975SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7976 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7977 EVT ResVT = Op.getValueType();
7978 EVT CmpVT = Op.getOperand(0).getValueType();
7979 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7980 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7981 SDLoc dl(Op);
7982
7983 // Without power9-vector, we don't have native instruction for f128 comparison.
7984 // Following transformation to libcall is needed for setcc:
7985 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7986 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7987 SDValue Z = DAG.getSetCC(
7989 LHS, RHS, CC);
7990 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7991 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7992 }
7993
7994 // Not FP, or using SPE? Not a fsel.
7995 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7996 Subtarget.hasSPE())
7997 return Op;
7998
7999 SDNodeFlags Flags = Op.getNode()->getFlags();
8000
8001 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8002 // presence of infinities.
8003 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8004 switch (CC) {
8005 default:
8006 break;
8007 case ISD::SETOGT:
8008 case ISD::SETGT:
8009 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8010 case ISD::SETOLT:
8011 case ISD::SETLT:
8012 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8013 }
8014 }
8015
8016 // We might be able to do better than this under some circumstances, but in
8017 // general, fsel-based lowering of select is a finite-math-only optimization.
8018 // For more information, see section F.3 of the 2.06 ISA specification.
8019 // With ISA 3.0
8020 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8021 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8022 return Op;
8023
8024 // If the RHS of the comparison is a 0.0, we don't need to do the
8025 // subtraction at all.
8026 SDValue Sel1;
8027 if (isFloatingPointZero(RHS))
8028 switch (CC) {
8029 default: break; // SETUO etc aren't handled by fsel.
8030 case ISD::SETNE:
8031 std::swap(TV, FV);
8032 [[fallthrough]];
8033 case ISD::SETEQ:
8034 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8035 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8036 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8037 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8038 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8039 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8040 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8041 case ISD::SETULT:
8042 case ISD::SETLT:
8043 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8044 [[fallthrough]];
8045 case ISD::SETOGE:
8046 case ISD::SETGE:
8047 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8048 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8049 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8050 case ISD::SETUGT:
8051 case ISD::SETGT:
8052 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8053 [[fallthrough]];
8054 case ISD::SETOLE:
8055 case ISD::SETLE:
8056 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8057 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8058 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8059 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8060 }
8061
8062 SDValue Cmp;
8063 switch (CC) {
8064 default: break; // SETUO etc aren't handled by fsel.
8065 case ISD::SETNE:
8066 std::swap(TV, FV);
8067 [[fallthrough]];
8068 case ISD::SETEQ:
8069 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8070 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8071 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8072 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8073 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8074 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8075 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8076 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8077 case ISD::SETULT:
8078 case ISD::SETLT:
8079 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8080 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8081 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8082 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8083 case ISD::SETOGE:
8084 case ISD::SETGE:
8085 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8086 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8087 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8088 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8089 case ISD::SETUGT:
8090 case ISD::SETGT:
8091 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8092 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8093 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8094 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8095 case ISD::SETOLE:
8096 case ISD::SETLE:
8097 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8098 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8099 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8100 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8101 }
8102 return Op;
8103}
8104
8105static unsigned getPPCStrictOpcode(unsigned Opc) {
8106 switch (Opc) {
8107 default:
8108 llvm_unreachable("No strict version of this opcode!");
8109 case PPCISD::FCTIDZ:
8110 return PPCISD::STRICT_FCTIDZ;
8111 case PPCISD::FCTIWZ:
8112 return PPCISD::STRICT_FCTIWZ;
8113 case PPCISD::FCTIDUZ:
8115 case PPCISD::FCTIWUZ:
8117 case PPCISD::FCFID:
8118 return PPCISD::STRICT_FCFID;
8119 case PPCISD::FCFIDU:
8120 return PPCISD::STRICT_FCFIDU;
8121 case PPCISD::FCFIDS:
8122 return PPCISD::STRICT_FCFIDS;
8123 case PPCISD::FCFIDUS:
8125 }
8126}
8127
8129 const PPCSubtarget &Subtarget) {
8130 SDLoc dl(Op);
8131 bool IsStrict = Op->isStrictFPOpcode();
8132 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8133 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8134
8135 // TODO: Any other flags to propagate?
8136 SDNodeFlags Flags;
8137 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8138
8139 // For strict nodes, source is the second operand.
8140 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8141 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8142 MVT DestTy = Op.getSimpleValueType();
8143 assert(Src.getValueType().isFloatingPoint() &&
8144 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8145 DestTy == MVT::i64) &&
8146 "Invalid FP_TO_INT types");
8147 if (Src.getValueType() == MVT::f32) {
8148 if (IsStrict) {
8149 Src =
8151 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8152 Chain = Src.getValue(1);
8153 } else
8154 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8155 }
8156 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8157 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8158 unsigned Opc = ISD::DELETED_NODE;
8159 switch (DestTy.SimpleTy) {
8160 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8161 case MVT::i32:
8162 Opc = IsSigned ? PPCISD::FCTIWZ
8163 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8164 break;
8165 case MVT::i64:
8166 assert((IsSigned || Subtarget.hasFPCVT()) &&
8167 "i64 FP_TO_UINT is supported only with FPCVT");
8168 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8169 }
8170 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8171 SDValue Conv;
8172 if (IsStrict) {
8173 Opc = getPPCStrictOpcode(Opc);
8174 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8175 Flags);
8176 } else {
8177 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8178 }
8179 return Conv;
8180}
8181
8182void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8183 SelectionDAG &DAG,
8184 const SDLoc &dl) const {
8185 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8186 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8187 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8188 bool IsStrict = Op->isStrictFPOpcode();
8189
8190 // Convert the FP value to an int value through memory.
8191 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8192 (IsSigned || Subtarget.hasFPCVT());
8193 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8194 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8195 MachinePointerInfo MPI =
8197
8198 // Emit a store to the stack slot.
8199 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8200 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8201 if (i32Stack) {
8203 Alignment = Align(4);
8204 MachineMemOperand *MMO =
8205 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8206 SDValue Ops[] = { Chain, Tmp, FIPtr };
8207 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8208 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8209 } else
8210 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8211
8212 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8213 // add in a bias on big endian.
8214 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8215 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8216 DAG.getConstant(4, dl, FIPtr.getValueType()));
8217 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8218 }
8219
8220 RLI.Chain = Chain;
8221 RLI.Ptr = FIPtr;
8222 RLI.MPI = MPI;
8223 RLI.Alignment = Alignment;
8224}
8225
8226/// Custom lowers floating point to integer conversions to use
8227/// the direct move instructions available in ISA 2.07 to avoid the
8228/// need for load/store combinations.
8229SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8230 SelectionDAG &DAG,
8231 const SDLoc &dl) const {
8232 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8233 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8234 if (Op->isStrictFPOpcode())
8235 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8236 else
8237 return Mov;
8238}
8239
8240SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8241 const SDLoc &dl) const {
8242 bool IsStrict = Op->isStrictFPOpcode();
8243 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8244 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8245 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8246 EVT SrcVT = Src.getValueType();
8247 EVT DstVT = Op.getValueType();
8248
8249 // FP to INT conversions are legal for f128.
8250 if (SrcVT == MVT::f128)
8251 return Subtarget.hasP9Vector() ? Op : SDValue();
8252
8253 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8254 // PPC (the libcall is not available).
8255 if (SrcVT == MVT::ppcf128) {
8256 if (DstVT == MVT::i32) {
8257 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8258 // set other fast-math flags to FP operations in both strict and
8259 // non-strict cases. (FP_TO_SINT, FSUB)
8260 SDNodeFlags Flags;
8261 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8262
8263 if (IsSigned) {
8264 SDValue Lo, Hi;
8265 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8266
8267 // Add the two halves of the long double in round-to-zero mode, and use
8268 // a smaller FP_TO_SINT.
8269 if (IsStrict) {
8271 DAG.getVTList(MVT::f64, MVT::Other),
8272 {Op.getOperand(0), Lo, Hi}, Flags);
8273 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8274 DAG.getVTList(MVT::i32, MVT::Other),
8275 {Res.getValue(1), Res}, Flags);
8276 } else {
8277 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8278 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8279 }
8280 } else {
8281 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8283 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8284 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8285 if (IsStrict) {
8286 // Sel = Src < 0x80000000
8287 // FltOfs = select Sel, 0.0, 0x80000000
8288 // IntOfs = select Sel, 0, 0x80000000
8289 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8290 SDValue Chain = Op.getOperand(0);
8291 EVT SetCCVT =
8293 EVT DstSetCCVT =
8295 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8296 Chain, true);
8297 Chain = Sel.getValue(1);
8298
8299 SDValue FltOfs = DAG.getSelect(
8300 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8302
8303 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8304 DAG.getVTList(SrcVT, MVT::Other),
8305 {Chain, Src, FltOfs}, Flags);
8306 Chain = Val.getValue(1);
8308 DAG.getVTList(DstVT, MVT::Other),
8309 {Chain, Val}, Flags);
8310 Chain = SInt.getValue(1);
8311 SDValue IntOfs = DAG.getSelect(
8312 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8314 return DAG.getMergeValues({Result, Chain}, dl);
8315 } else {
8316 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8317 // FIXME: generated code sucks.
8318 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8319 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8320 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8321 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8322 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8323 }
8324 }
8325 }
8326
8327 return SDValue();
8328 }
8329
8330 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8331 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8332
8333 ReuseLoadInfo RLI;
8334 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8335
8336 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8337 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8338}
8339
8340// We're trying to insert a regular store, S, and then a load, L. If the
8341// incoming value, O, is a load, we might just be able to have our load use the
8342// address used by O. However, we don't know if anything else will store to
8343// that address before we can load from it. To prevent this situation, we need
8344// to insert our load, L, into the chain as a peer of O. To do this, we give L
8345// the same chain operand as O, we create a token factor from the chain results
8346// of O and L, and we replace all uses of O's chain result with that token
8347// factor (see spliceIntoChain below for this last part).
8348bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8349 ReuseLoadInfo &RLI,
8350 SelectionDAG &DAG,
8351 ISD::LoadExtType ET) const {
8352 // Conservatively skip reusing for constrained FP nodes.
8353 if (Op->isStrictFPOpcode())
8354 return false;
8355
8356 SDLoc dl(Op);
8357 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8358 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8359 if (ET == ISD::NON_EXTLOAD &&
8360 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8361 isOperationLegalOrCustom(Op.getOpcode(),
8362 Op.getOperand(0).getValueType())) {
8363
8364 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8365 return true;
8366 }
8367
8369 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8370 LD->isNonTemporal())
8371 return false;
8372 if (LD->getMemoryVT() != MemVT)
8373 return false;
8374
8375 // If the result of the load is an illegal type, then we can't build a
8376 // valid chain for reuse since the legalised loads and token factor node that
8377 // ties the legalised loads together uses a different output chain then the
8378 // illegal load.
8379 if (!isTypeLegal(LD->getValueType(0)))
8380 return false;
8381
8382 RLI.Ptr = LD->getBasePtr();
8383 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8384 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8385 "Non-pre-inc AM on PPC?");
8386 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8387 LD->getOffset());
8388 }
8389
8390 RLI.Chain = LD->getChain();
8391 RLI.MPI = LD->getPointerInfo();
8392 RLI.IsDereferenceable = LD->isDereferenceable();
8393 RLI.IsInvariant = LD->isInvariant();
8394 RLI.Alignment = LD->getAlign();
8395 RLI.AAInfo = LD->getAAInfo();
8396 RLI.Ranges = LD->getRanges();
8397
8398 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8399 return true;
8400}
8401
8402// Given the head of the old chain, ResChain, insert a token factor containing
8403// it and NewResChain, and make users of ResChain now be users of that token
8404// factor.
8405// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8406void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8408 SelectionDAG &DAG) const {
8409 if (!ResChain)
8410 return;
8411
8412 SDLoc dl(NewResChain);
8413
8414 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8415 NewResChain, DAG.getUNDEF(MVT::Other));
8416 assert(TF.getNode() != NewResChain.getNode() &&
8417 "A new TF really is required here");
8418
8419 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8420 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8421}
8422
8423/// Analyze profitability of direct move
8424/// prefer float load to int load plus direct move
8425/// when there is no integer use of int load
8426bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8427 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8428 if (Origin->getOpcode() != ISD::LOAD)
8429 return true;
8430
8431 // If there is no LXSIBZX/LXSIHZX, like Power8,
8432 // prefer direct move if the memory size is 1 or 2 bytes.
8433 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8434 if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8435 return true;
8436
8437 for (SDNode::use_iterator UI = Origin->use_begin(),
8438 UE = Origin->use_end();
8439 UI != UE; ++UI) {
8440
8441 // Only look at the users of the loaded value.
8442 if (UI.getUse().get().getResNo() != 0)
8443 continue;
8444
8445 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8446 UI->getOpcode() != ISD::UINT_TO_FP &&
8447 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8448 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8449 return true;
8450 }
8451
8452 return false;
8453}
8454
8456 const PPCSubtarget &Subtarget,
8457 SDValue Chain = SDValue()) {
8458 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8459 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8460 SDLoc dl(Op);
8461
8462 // TODO: Any other flags to propagate?
8463 SDNodeFlags Flags;
8464 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8465
8466 // If we have FCFIDS, then use it when converting to single-precision.
8467 // Otherwise, convert to double-precision and then round.
8468 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8469 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8470 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8471 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8472 if (Op->isStrictFPOpcode()) {
8473 if (!Chain)
8474 Chain = Op.getOperand(0);
8475 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8476 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8477 } else
8478 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8479}
8480
8481/// Custom lowers integer to floating point conversions to use
8482/// the direct move instructions available in ISA 2.07 to avoid the
8483/// need for load/store combinations.
8484SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8485 SelectionDAG &DAG,
8486 const SDLoc &dl) const {
8487 assert((Op.getValueType() == MVT::f32 ||
8488 Op.getValueType() == MVT::f64) &&
8489 "Invalid floating point type as target of conversion");
8490 assert(Subtarget.hasFPCVT() &&
8491 "Int to FP conversions with direct moves require FPCVT");
8492 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8493 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8494 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8495 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8496 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8497 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8498 return convertIntToFP(Op, Mov, DAG, Subtarget);
8499}
8500
8501static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8502
8503 EVT VecVT = Vec.getValueType();
8504 assert(VecVT.isVector() && "Expected a vector type.");
8505 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8506
8507 EVT EltVT = VecVT.getVectorElementType();
8508 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8510
8511 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8513 Ops[0] = Vec;
8514 SDValue UndefVec = DAG.getUNDEF(VecVT);
8515 for (unsigned i = 1; i < NumConcat; ++i)
8516 Ops[i] = UndefVec;
8517
8518 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8519}
8520
8521SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8522 const SDLoc &dl) const {
8523 bool IsStrict = Op->isStrictFPOpcode();
8524 unsigned Opc = Op.getOpcode();
8525 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8526 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8528 "Unexpected conversion type");
8529 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8530 "Supports conversions to v2f64/v4f32 only.");
8531
8532 // TODO: Any other flags to propagate?
8533 SDNodeFlags Flags;
8534 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8535
8536 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8537 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8538
8539 SDValue Wide = widenVec(DAG, Src, dl);
8540 EVT WideVT = Wide.getValueType();
8541 unsigned WideNumElts = WideVT.getVectorNumElements();
8542 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8543
8545 for (unsigned i = 0; i < WideNumElts; ++i)
8546 ShuffV.push_back(i + WideNumElts);
8547
8548 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8549 int SaveElts = FourEltRes ? 4 : 2;
8550 if (Subtarget.isLittleEndian())
8551 for (int i = 0; i < SaveElts; i++)
8552 ShuffV[i * Stride] = i;
8553 else
8554 for (int i = 1; i <= SaveElts; i++)
8555 ShuffV[i * Stride - 1] = i - 1;
8556
8558 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8560
8562 if (SignedConv) {
8564 EVT ExtVT = Src.getValueType();
8565 if (Subtarget.hasP9Altivec())
8566 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8567 IntermediateVT.getVectorNumElements());
8568
8570 DAG.getValueType(ExtVT));
8571 } else
8573
8574 if (IsStrict)
8575 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8576 {Op.getOperand(0), Extend}, Flags);
8577
8578 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8579}
8580
8581SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8582 SelectionDAG &DAG) const {
8583 SDLoc dl(Op);
8584 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8585 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8586 bool IsStrict = Op->isStrictFPOpcode();
8587 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8588 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8589
8590 // TODO: Any other flags to propagate?
8591 SDNodeFlags Flags;
8592 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8593
8594 EVT InVT = Src.getValueType();
8595 EVT OutVT = Op.getValueType();
8596 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8597 isOperationCustom(Op.getOpcode(), InVT))
8598 return LowerINT_TO_FPVector(Op, DAG, dl);
8599
8600 // Conversions to f128 are legal.
8601 if (Op.getValueType() == MVT::f128)
8602 return Subtarget.hasP9Vector() ? Op : SDValue();
8603
8604 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8605 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8606 return SDValue();
8607
8608 if (Src.getValueType() == MVT::i1) {
8609 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8610 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8611 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8612 if (IsStrict)
8613 return DAG.getMergeValues({Sel, Chain}, dl);
8614 else
8615 return Sel;
8616 }
8617
8618 // If we have direct moves, we can do all the conversion, skip the store/load
8619 // however, without FPCVT we can't do most conversions.
8620 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8621 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8622 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8623
8624 assert((IsSigned || Subtarget.hasFPCVT()) &&
8625 "UINT_TO_FP is supported only with FPCVT");
8626
8627 if (Src.getValueType() == MVT::i64) {
8628 SDValue SINT = Src;
8629 // When converting to single-precision, we actually need to convert
8630 // to double-precision first and then round to single-precision.
8631 // To avoid double-rounding effects during that operation, we have
8632 // to prepare the input operand. Bits that might be truncated when
8633 // converting to double-precision are replaced by a bit that won't
8634 // be lost at this stage, but is below the single-precision rounding
8635 // position.
8636 //
8637 // However, if -enable-unsafe-fp-math is in effect, accept double
8638 // rounding to avoid the extra overhead.
8639 if (Op.getValueType() == MVT::f32 &&
8640 !Subtarget.hasFPCVT() &&
8642
8643 // Twiddle input to make sure the low 11 bits are zero. (If this
8644 // is the case, we are guaranteed the value will fit into the 53 bit
8645 // mantissa of an IEEE double-precision value without rounding.)
8646 // If any of those low 11 bits were not zero originally, make sure
8647 // bit 12 (value 2048) is set instead, so that the final rounding
8648 // to single-precision gets the correct result.
8649 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8650 SINT, DAG.getConstant(2047, dl, MVT::i64));
8651 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8652 Round, DAG.getConstant(2047, dl, MVT::i64));
8653 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8654 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8655 Round, DAG.getConstant(-2048, dl, MVT::i64));
8656
8657 // However, we cannot use that value unconditionally: if the magnitude
8658 // of the input value is small, the bit-twiddling we did above might
8659 // end up visibly changing the output. Fortunately, in that case, we
8660 // don't need to twiddle bits since the original input will convert
8661 // exactly to double-precision floating-point already. Therefore,
8662 // construct a conditional to use the original value if the top 11
8663 // bits are all sign-bit copies, and use the rounded value computed
8664 // above otherwise.
8665 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8666 SINT, DAG.getConstant(53, dl, MVT::i32));
8667 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8668 Cond, DAG.getConstant(1, dl, MVT::i64));
8669 Cond = DAG.getSetCC(
8670 dl,
8671 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8672 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8673
8674 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8675 }
8676
8677 ReuseLoadInfo RLI;
8678 SDValue Bits;
8679
8681 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8682 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8683 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8684 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8685 } else if (Subtarget.hasLFIWAX() &&
8686 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8687 MachineMemOperand *MMO =
8689 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8690 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8692 DAG.getVTList(MVT::f64, MVT::Other),
8693 Ops, MVT::i32, MMO);
8694 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8695 } else if (Subtarget.hasFPCVT() &&
8696 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8697 MachineMemOperand *MMO =
8699 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8700 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8702 DAG.getVTList(MVT::f64, MVT::Other),
8703 Ops, MVT::i32, MMO);
8704 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8705 } else if (((Subtarget.hasLFIWAX() &&
8706 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8707 (Subtarget.hasFPCVT() &&
8708 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8709 SINT.getOperand(0).getValueType() == MVT::i32) {
8710 MachineFrameInfo &MFI = MF.getFrameInfo();
8712
8713 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8714 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8715
8716 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8718 DAG.getMachineFunction(), FrameIdx));
8719 Chain = Store;
8720
8721 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8722 "Expected an i32 store");
8723
8724 RLI.Ptr = FIdx;
8725 RLI.Chain = Chain;
8726 RLI.MPI =
8728 RLI.Alignment = Align(4);
8729
8730 MachineMemOperand *MMO =
8732 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8733 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8734 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8735 PPCISD::LFIWZX : PPCISD::LFIWAX,
8736 dl, DAG.getVTList(MVT::f64, MVT::Other),
8737 Ops, MVT::i32, MMO);
8738 Chain = Bits.getValue(1);
8739 } else
8740 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8741
8742 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8743 if (IsStrict)
8744 Chain = FP.getValue(1);
8745
8746 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8747 if (IsStrict)
8749 DAG.getVTList(MVT::f32, MVT::Other),
8750 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8751 else
8752 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8753 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8754 }
8755 return FP;
8756 }
8757
8758 assert(Src.getValueType() == MVT::i32 &&
8759 "Unhandled INT_TO_FP type in custom expander!");
8760 // Since we only generate this in 64-bit mode, we can take advantage of
8761 // 64-bit registers. In particular, sign extend the input value into the
8762 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8763 // then lfd it and fcfid it.
8765 MachineFrameInfo &MFI = MF.getFrameInfo();
8767
8768 SDValue Ld;
8769 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8770 ReuseLoadInfo RLI;
8771 bool ReusingLoad;
8772 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8773 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8774 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8775
8776 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8778 DAG.getMachineFunction(), FrameIdx));
8779 Chain = Store;
8780
8781 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8782 "Expected an i32 store");
8783
8784 RLI.Ptr = FIdx;
8785 RLI.Chain = Chain;
8786 RLI.MPI =
8788 RLI.Alignment = Align(4);
8789 }
8790
8791 MachineMemOperand *MMO =
8793 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8794 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8795 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8796 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8797 MVT::i32, MMO);
8798 Chain = Ld.getValue(1);
8799 if (ReusingLoad)
8800 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8801 } else {
8802 assert(Subtarget.isPPC64() &&
8803 "i32->FP without LFIWAX supported only on PPC64");
8804
8805 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8806 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8807
8808 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8809
8810 // STD the extended value into the stack slot.
8811 SDValue Store = DAG.getStore(
8812 Chain, dl, Ext64, FIdx,
8814 Chain = Store;
8815
8816 // Load the value as a double.
8817 Ld = DAG.getLoad(
8818 MVT::f64, dl, Chain, FIdx,
8820 Chain = Ld.getValue(1);
8821 }
8822
8823 // FCFID it and return it.
8824 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8825 if (IsStrict)
8826 Chain = FP.getValue(1);
8827 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8828 if (IsStrict)
8830 DAG.getVTList(MVT::f32, MVT::Other),
8831 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8832 else
8833 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8834 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8835 }
8836 return FP;
8837}
8838
8839SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8840 SelectionDAG &DAG) const {
8841 SDLoc dl(Op);
8842 /*
8843 The rounding mode is in bits 30:31 of FPSR, and has the following
8844 settings:
8845 00 Round to nearest
8846 01 Round to 0
8847 10 Round to +inf
8848 11 Round to -inf
8849
8850 GET_ROUNDING, on the other hand, expects the following:
8851 -1 Undefined
8852 0 Round to 0
8853 1 Round to nearest
8854 2 Round to +inf
8855 3 Round to -inf
8856
8857 To perform the conversion, we do:
8858 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8859 */
8860
8862 EVT VT = Op.getValueType();
8864
8865 // Save FP Control Word to register
8866 SDValue Chain = Op.getOperand(0);
8867 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8868 Chain = MFFS.getValue(1);
8869
8870 SDValue CWD;
8871 if (isTypeLegal(MVT::i64)) {
8872 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8873 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8874 } else {
8875 // Save FP register to stack slot
8876 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8877 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8878 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8879
8880 // Load FP Control Word from low 32 bits of stack slot.
8882 "Stack slot adjustment is valid only on big endian subtargets!");
8883 SDValue Four = DAG.getConstant(4, dl, PtrVT);
8884 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8885 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8886 Chain = CWD.getValue(1);
8887 }
8888
8889 // Transform as necessary
8890 SDValue CWD1 =
8891 DAG.getNode(ISD::AND, dl, MVT::i32,
8892 CWD, DAG.getConstant(3, dl, MVT::i32));
8893 SDValue CWD2 =
8894 DAG.getNode(ISD::SRL, dl, MVT::i32,
8895 DAG.getNode(ISD::AND, dl, MVT::i32,
8896 DAG.getNode(ISD::XOR, dl, MVT::i32,
8897 CWD, DAG.getConstant(3, dl, MVT::i32)),
8898 DAG.getConstant(3, dl, MVT::i32)),
8899 DAG.getConstant(1, dl, MVT::i32));
8900
8901 SDValue RetVal =
8902 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8903
8904 RetVal =
8905 DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8906 dl, VT, RetVal);
8907
8908 return DAG.getMergeValues({RetVal, Chain}, dl);
8909}
8910
8911SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8912 EVT VT = Op.getValueType();
8913 unsigned BitWidth = VT.getSizeInBits();
8914 SDLoc dl(Op);
8915 assert(Op.getNumOperands() == 3 &&
8916 VT == Op.getOperand(1).getValueType() &&
8917 "Unexpected SHL!");
8918
8919 // Expand into a bunch of logical ops. Note that these ops
8920 // depend on the PPC behavior for oversized shift amounts.
8921 SDValue Lo = Op.getOperand(0);
8922 SDValue Hi = Op.getOperand(1);
8923 SDValue Amt = Op.getOperand(2);
8924 EVT AmtVT = Amt.getValueType();
8925
8926 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8927 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8928 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8929 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8930 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8931 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8932 DAG.getConstant(-BitWidth, dl, AmtVT));
8933 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8934 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8935 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8936 SDValue OutOps[] = { OutLo, OutHi };
8937 return DAG.getMergeValues(OutOps, dl);
8938}
8939
8940SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8941 EVT VT = Op.getValueType();
8942 SDLoc dl(Op);
8943 unsigned BitWidth = VT.getSizeInBits();
8944 assert(Op.getNumOperands() == 3 &&
8945 VT == Op.getOperand(1).getValueType() &&
8946 "Unexpected SRL!");
8947
8948 // Expand into a bunch of logical ops. Note that these ops
8949 // depend on the PPC behavior for oversized shift amounts.
8950 SDValue Lo = Op.getOperand(0);
8951 SDValue Hi = Op.getOperand(1);
8952 SDValue Amt = Op.getOperand(2);
8953 EVT AmtVT = Amt.getValueType();
8954
8955 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8956 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8957 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8958 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8959 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8960 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8961 DAG.getConstant(-BitWidth, dl, AmtVT));
8962 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8963 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8964 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8965 SDValue OutOps[] = { OutLo, OutHi };
8966 return DAG.getMergeValues(OutOps, dl);
8967}
8968
8969SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8970 SDLoc dl(Op);
8971 EVT VT = Op.getValueType();
8972 unsigned BitWidth = VT.getSizeInBits();
8973 assert(Op.getNumOperands() == 3 &&
8974 VT == Op.getOperand(1).getValueType() &&
8975 "Unexpected SRA!");
8976
8977 // Expand into a bunch of logical ops, followed by a select_cc.
8978 SDValue Lo = Op.getOperand(0);
8979 SDValue Hi = Op.getOperand(1);
8980 SDValue Amt = Op.getOperand(2);
8981 EVT AmtVT = Amt.getValueType();
8982
8983 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8984 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8985 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8986 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8987 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8988 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8989 DAG.getConstant(-BitWidth, dl, AmtVT));
8990 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8991 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8992 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8993 Tmp4, Tmp6, ISD::SETLE);
8994 SDValue OutOps[] = { OutLo, OutHi };
8995 return DAG.getMergeValues(OutOps, dl);
8996}
8997
8998SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8999 SelectionDAG &DAG) const {
9000 SDLoc dl(Op);
9001 EVT VT = Op.getValueType();
9002 unsigned BitWidth = VT.getSizeInBits();
9003
9004 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9005 SDValue X = Op.getOperand(0);
9006 SDValue Y = Op.getOperand(1);
9007 SDValue Z = Op.getOperand(2);
9008 EVT AmtVT = Z.getValueType();
9009
9010 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9011 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9012 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9013 // on PowerPC shift by BW being well defined.
9014 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9015 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9016 SDValue SubZ =
9017 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9018 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9019 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9020 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9021}
9022
9023//===----------------------------------------------------------------------===//
9024// Vector related lowering.
9025//
9026
9027/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9028/// element size of SplatSize. Cast the result to VT.
9030 SelectionDAG &DAG, const SDLoc &dl) {
9031 static const MVT VTys[] = { // canonical VT to use for each size.
9032 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9033 };
9034
9035 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9036
9037 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9038 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9039 SplatSize = 1;
9040 Val = 0xFF;
9041 }
9042
9044
9045 // Build a canonical splat for this value.
9046 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9047}
9048
9049/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9050/// specified intrinsic ID.
9052 const SDLoc &dl, EVT DestVT = MVT::Other) {
9053 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9054 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9055 DAG.getConstant(IID, dl, MVT::i32), Op);
9056}
9057
9058/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9059/// specified intrinsic ID.
9060static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9061 SelectionDAG &DAG, const SDLoc &dl,
9062 EVT DestVT = MVT::Other) {
9063 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9064 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9065 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9066}
9067
9068/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9069/// specified intrinsic ID.
9070static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9071 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9072 EVT DestVT = MVT::Other) {
9073 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9074 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9075 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9076}
9077
9078/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9079/// amount. The result has the specified value type.
9080static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9081 SelectionDAG &DAG, const SDLoc &dl) {
9082 // Force LHS/RHS to be the right type.
9083 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9084 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9085
9086 int Ops[16];
9087 for (unsigned i = 0; i != 16; ++i)
9088 Ops[i] = i + Amt;
9089 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9090 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9091}
9092
9093/// Do we have an efficient pattern in a .td file for this node?
9094///
9095/// \param V - pointer to the BuildVectorSDNode being matched
9096/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9097///
9098/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9099/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9100/// the opposite is true (expansion is beneficial) are:
9101/// - The node builds a vector out of integers that are not 32 or 64-bits
9102/// - The node builds a vector out of constants
9103/// - The node is a "load-and-splat"
9104/// In all other cases, we will choose to keep the BUILD_VECTOR.
9106 bool HasDirectMove,
9107 bool HasP8Vector) {
9108 EVT VecVT = V->getValueType(0);
9109 bool RightType = VecVT == MVT::v2f64 ||
9110 (HasP8Vector && VecVT == MVT::v4f32) ||
9111 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9112 if (!RightType)
9113 return false;
9114
9115 bool IsSplat = true;
9116 bool IsLoad = false;
9117 SDValue Op0 = V->getOperand(0);
9118
9119 // This function is called in a block that confirms the node is not a constant
9120 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9121 // different constants.
9122 if (V->isConstant())
9123 return false;
9124 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9125 if (V->getOperand(i).isUndef())
9126 return false;
9127 // We want to expand nodes that represent load-and-splat even if the
9128 // loaded value is a floating point truncation or conversion to int.
9129 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9130 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9131 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9132 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9133 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9134 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9135 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9136 IsLoad = true;
9137 // If the operands are different or the input is not a load and has more
9138 // uses than just this BV node, then it isn't a splat.
9139 if (V->getOperand(i) != Op0 ||
9140 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9141 IsSplat = false;
9142 }
9143 return !(IsSplat && IsLoad);
9144}
9145
9146// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9147SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9148
9149 SDLoc dl(Op);
9150 SDValue Op0 = Op->getOperand(0);
9151
9152 if ((Op.getValueType() != MVT::f128) ||
9153 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9154 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9155 (Op0.getOperand(1).getValueType() != MVT::i64))
9156 return SDValue();
9157
9158 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9159 Op0.getOperand(1));
9160}
9161
9162static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9163 const SDValue *InputLoad = &Op;
9164 while (InputLoad->getOpcode() == ISD::BITCAST)
9165 InputLoad = &InputLoad->getOperand(0);
9166 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9169 InputLoad = &InputLoad->getOperand(0);
9170 }
9171 if (InputLoad->getOpcode() != ISD::LOAD)
9172 return nullptr;
9174 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9175}
9176
9177// Convert the argument APFloat to a single precision APFloat if there is no
9178// loss in information during the conversion to single precision APFloat and the
9179// resulting number is not a denormal number. Return true if successful.
9190
9191// Bitcast the argument APInt to a double and convert it to a single precision
9192// APFloat, bitcast the APFloat to an APInt and assign it to the original
9193// argument if there is no loss in information during the conversion from
9194// double to single precision APFloat and the resulting number is not a denormal
9195// number. Return true if successful.
9197 double DpValue = ArgAPInt.bitsToDouble();
9200 if (Success)
9201 ArgAPInt = APFloatDp.bitcastToAPInt();
9202 return Success;
9203}
9204
9205// Nondestructive check for convertTonNonDenormSingle.
9207 // Only convert if it loses info, since XXSPLTIDP should
9208 // handle the other case.
9210 bool LosesInfo = true;
9212 &LosesInfo);
9213
9214 return (!LosesInfo && !APFloatToConvert.isDenormal());
9215}
9216
9217static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9218 unsigned &Opcode) {
9219 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9220 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9221 return false;
9222
9223 EVT Ty = Op->getValueType(0);
9224 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9225 // as we cannot handle extending loads for these types.
9226 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9228 return true;
9229
9230 EVT MemVT = InputNode->getMemoryVT();
9231 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9232 // memory VT is the same vector element VT type.
9233 // The loads feeding into the v8i16 and v16i8 types will be extending because
9234 // scalar i8/i16 are not legal types.
9235 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9236 (MemVT == Ty.getVectorElementType()))
9237 return true;
9238
9239 if (Ty == MVT::v2i64) {
9240 // Check the extend type, when the input type is i32, and the output vector
9241 // type is v2i64.
9242 if (MemVT == MVT::i32) {
9244 Opcode = PPCISD::ZEXT_LD_SPLAT;
9246 Opcode = PPCISD::SEXT_LD_SPLAT;
9247 }
9248 return true;
9249 }
9250 return false;
9251}
9252
9253// If this is a case we can't handle, return null and let the default
9254// expansion code take care of it. If we CAN select this case, and if it
9255// selects to a single instruction, return Op. Otherwise, if we can codegen
9256// this case more efficiently than a constant pool load, lower it to the
9257// sequence of ops that should be used.
9258SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9259 SelectionDAG &DAG) const {
9260 SDLoc dl(Op);
9262 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9263
9264 // Check if this is a splat of a constant value.
9266 unsigned SplatBitSize;
9267 bool HasAnyUndefs;
9268 bool BVNIsConstantSplat =
9269 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9270 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9271
9272 // If it is a splat of a double, check if we can shrink it to a 32 bit
9273 // non-denormal float which when converted back to double gives us the same
9274 // double. This is to exploit the XXSPLTIDP instruction.
9275 // If we lose precision, we use XXSPLTI32DX.
9276 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9277 Subtarget.hasPrefixInstrs()) {
9278 // Check the type first to short-circuit so we don't modify APSplatBits if
9279 // this block isn't executed.
9280 if ((Op->getValueType(0) == MVT::v2f64) &&
9283 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9284 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9285 return DAG.getBitcast(Op.getValueType(), SplatNode);
9286 } else {
9287 // We may lose precision, so we have to use XXSPLTI32DX.
9288
9289 uint32_t Hi =
9290 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9291 uint32_t Lo =
9292 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9293 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9294
9295 if (!Hi || !Lo)
9296 // If either load is 0, then we should generate XXLXOR to set to 0.
9297 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9298
9299 if (Hi)
9300 SplatNode = DAG.getNode(
9301 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9302 DAG.getTargetConstant(0, dl, MVT::i32),
9303 DAG.getTargetConstant(Hi, dl, MVT::i32));
9304
9305 if (Lo)
9306 SplatNode =
9307 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9308 DAG.getTargetConstant(1, dl, MVT::i32),
9309 DAG.getTargetConstant(Lo, dl, MVT::i32));
9310
9311 return DAG.getBitcast(Op.getValueType(), SplatNode);
9312 }
9313 }
9314
9315 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9316 unsigned NewOpcode = PPCISD::LD_SPLAT;
9317
9318 // Handle load-and-splat patterns as we have instructions that will do this
9319 // in one go.
9320 if (DAG.isSplatValue(Op, true) &&
9321 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9322 const SDValue *InputLoad = &Op.getOperand(0);
9324
9325 // If the input load is an extending load, it will be an i32 -> i64
9326 // extending load and isValidSplatLoad() will update NewOpcode.
9327 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9328 unsigned ElementSize =
9329 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9330
9331 assert(((ElementSize == 2 * MemorySize)
9334 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9335 "Unmatched element size and opcode!\n");
9336
9337 // Checking for a single use of this load, we have to check for vector
9338 // width (128 bits) / ElementSize uses (since each operand of the
9339 // BUILD_VECTOR is a separate use of the value.
9340 unsigned NumUsesOfInputLD = 128 / ElementSize;
9341 for (SDValue BVInOp : Op->ops())
9342 if (BVInOp.isUndef())
9344
9345 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9346 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9347 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9348 // 15", but function IsValidSplatLoad() now will only return true when
9349 // the data at index 0 is not nullptr. So we will not get into trouble for
9350 // these cases.
9351 //
9352 // case 1 - lfiwzx/lfiwax
9353 // 1.1: load result is i32 and is sign/zero extend to i64;
9354 // 1.2: build a v2i64 vector type with above loaded value;
9355 // 1.3: the vector has only one value at index 0, others are all undef;
9356 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9357 if (NumUsesOfInputLD == 1 &&
9358 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9359 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9360 Subtarget.hasLFIWAX()))
9361 return SDValue();
9362
9363 // case 2 - lxvr[hb]x
9364 // 2.1: load result is at most i16;
9365 // 2.2: build a vector with above loaded value;
9366 // 2.3: the vector has only one value at index 0, others are all undef;
9367 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9368 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9369 Subtarget.isISA3_1() && ElementSize <= 16)
9370 return SDValue();
9371
9372 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9373 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9374 Subtarget.hasVSX()) {
9375 SDValue Ops[] = {
9376 LD->getChain(), // Chain
9377 LD->getBasePtr(), // Ptr
9378 DAG.getValueType(Op.getValueType()) // VT
9379 };
9381 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9382 LD->getMemoryVT(), LD->getMemOperand());
9383 // Replace all uses of the output chain of the original load with the
9384 // output chain of the new load.
9385 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9386 LdSplt.getValue(1));
9387 return LdSplt;
9388 }
9389 }
9390
9391 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9392 // 32-bits can be lowered to VSX instructions under certain conditions.
9393 // Without VSX, there is no pattern more efficient than expanding the node.
9394 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9395 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9396 Subtarget.hasP8Vector()))
9397 return Op;
9398 return SDValue();
9399 }
9400
9401 uint64_t SplatBits = APSplatBits.getZExtValue();
9402 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9403 unsigned SplatSize = SplatBitSize / 8;
9404
9405 // First, handle single instruction cases.
9406
9407 // All zeros?
9408 if (SplatBits == 0) {
9409 // Canonicalize all zero vectors to be v4i32.
9410 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9411 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9412 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9413 }
9414 return Op;
9415 }
9416
9417 // We have XXSPLTIW for constant splats four bytes wide.
9418 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9419 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9420 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9421 // turned into a 4-byte splat of 0xABABABAB.
9422 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9423 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9424 Op.getValueType(), DAG, dl);
9425
9426 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9427 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9428 dl);
9429
9430 // We have XXSPLTIB for constant splats one byte wide.
9431 if (Subtarget.hasP9Vector() && SplatSize == 1)
9432 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9433 dl);
9434
9435 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9436 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9437 (32-SplatBitSize));
9438 if (SextVal >= -16 && SextVal <= 15)
9439 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9440 dl);
9441
9442 // Two instruction sequences.
9443
9444 // If this value is in the range [-32,30] and is even, use:
9445 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9446 // If this value is in the range [17,31] and is odd, use:
9447 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9448 // If this value is in the range [-31,-17] and is odd, use:
9449 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9450 // Note the last two are three-instruction sequences.
9451 if (SextVal >= -32 && SextVal <= 31) {
9452 // To avoid having these optimizations undone by constant folding,
9453 // we convert to a pseudo that will be expanded later into one of
9454 // the above forms.
9455 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9456 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9457 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9458 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9459 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9460 if (VT == Op.getValueType())
9461 return RetVal;
9462 else
9463 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9464 }
9465
9466 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9467 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9468 // for fneg/fabs.
9469 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9470 // Make -1 and vspltisw -1:
9471 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9472
9473 // Make the VSLW intrinsic, computing 0x8000_0000.
9474 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9475 OnesV, DAG, dl);
9476
9477 // xor by OnesV to invert it.
9478 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9479 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9480 }
9481
9482 // Check to see if this is a wide variety of vsplti*, binop self cases.
9483 static const signed char SplatCsts[] = {
9484 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9485 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9486 };
9487
9488 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9489 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9490 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9491 int i = SplatCsts[idx];
9492
9493 // Figure out what shift amount will be used by altivec if shifted by i in
9494 // this splat size.
9495 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9496
9497 // vsplti + shl self.
9498 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9499 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9500 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9501 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9502 Intrinsic::ppc_altivec_vslw
9503 };
9504 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9505 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9506 }
9507
9508 // vsplti + srl self.
9509 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9510 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9511 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9512 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9513 Intrinsic::ppc_altivec_vsrw
9514 };
9515 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9516 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9517 }
9518
9519 // vsplti + rol self.
9520 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9521 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9522 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9523 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9524 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9525 Intrinsic::ppc_altivec_vrlw
9526 };
9527 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9528 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9529 }
9530
9531 // t = vsplti c, result = vsldoi t, t, 1
9532 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9533 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9534 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9535 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9536 }
9537 // t = vsplti c, result = vsldoi t, t, 2
9538 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9539 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9540 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9541 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9542 }
9543 // t = vsplti c, result = vsldoi t, t, 3
9544 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9545 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9546 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9547 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9548 }
9549 }
9550
9551 return SDValue();
9552}
9553
9554/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9555/// the specified operations to build the shuffle.
9557 SDValue RHS, SelectionDAG &DAG,
9558 const SDLoc &dl) {
9559 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9560 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9561 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9562
9563 enum {
9564 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9565 OP_VMRGHW,
9566 OP_VMRGLW,
9571 OP_VSLDOI4,
9572 OP_VSLDOI8,
9574 };
9575
9576 if (OpNum == OP_COPY) {
9577 if (LHSID == (1*9+2)*9+3) return LHS;
9578 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9579 return RHS;
9580 }
9581
9585
9586 int ShufIdxs[16];
9587 switch (OpNum) {
9588 default: llvm_unreachable("Unknown i32 permute!");
9589 case OP_VMRGHW:
9590 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9591 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9592 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9593 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9594 break;
9595 case OP_VMRGLW:
9596 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9597 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9598 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9599 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9600 break;
9601 case OP_VSPLTISW0:
9602 for (unsigned i = 0; i != 16; ++i)
9603 ShufIdxs[i] = (i&3)+0;
9604 break;
9605 case OP_VSPLTISW1:
9606 for (unsigned i = 0; i != 16; ++i)
9607 ShufIdxs[i] = (i&3)+4;
9608 break;
9609 case OP_VSPLTISW2:
9610 for (unsigned i = 0; i != 16; ++i)
9611 ShufIdxs[i] = (i&3)+8;
9612 break;
9613 case OP_VSPLTISW3:
9614 for (unsigned i = 0; i != 16; ++i)
9615 ShufIdxs[i] = (i&3)+12;
9616 break;
9617 case OP_VSLDOI4:
9618 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9619 case OP_VSLDOI8:
9620 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9621 case OP_VSLDOI12:
9622 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9623 }
9624 EVT VT = OpLHS.getValueType();
9625 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9626 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9627 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9628 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9629}
9630
9631/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9632/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9633/// SDValue.
9634SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9635 SelectionDAG &DAG) const {
9636 const unsigned BytesInVector = 16;
9637 bool IsLE = Subtarget.isLittleEndian();
9638 SDLoc dl(N);
9639 SDValue V1 = N->getOperand(0);
9640 SDValue V2 = N->getOperand(1);
9641 unsigned ShiftElts = 0, InsertAtByte = 0;
9642 bool Swap = false;
9643
9644 // Shifts required to get the byte we want at element 7.
9645 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9646 0, 15, 14, 13, 12, 11, 10, 9};
9647 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9648 1, 2, 3, 4, 5, 6, 7, 8};
9649
9650 ArrayRef<int> Mask = N->getMask();
9651 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9652
9653 // For each mask element, find out if we're just inserting something
9654 // from V2 into V1 or vice versa.
9655 // Possible permutations inserting an element from V2 into V1:
9656 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9657 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9658 // ...
9659 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9660 // Inserting from V1 into V2 will be similar, except mask range will be
9661 // [16,31].
9662
9663 bool FoundCandidate = false;
9664 // If both vector operands for the shuffle are the same vector, the mask
9665 // will contain only elements from the first one and the second one will be
9666 // undef.
9667 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9668 // Go through the mask of half-words to find an element that's being moved
9669 // from one vector to the other.
9670 for (unsigned i = 0; i < BytesInVector; ++i) {
9671 unsigned CurrentElement = Mask[i];
9672 // If 2nd operand is undefined, we should only look for element 7 in the
9673 // Mask.
9674 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9675 continue;
9676
9677 bool OtherElementsInOrder = true;
9678 // Examine the other elements in the Mask to see if they're in original
9679 // order.
9680 for (unsigned j = 0; j < BytesInVector; ++j) {
9681 if (j == i)
9682 continue;
9683 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9684 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9685 // in which we always assume we're always picking from the 1st operand.
9686 int MaskOffset =
9687 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9688 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9689 OtherElementsInOrder = false;
9690 break;
9691 }
9692 }
9693 // If other elements are in original order, we record the number of shifts
9694 // we need to get the element we want into element 7. Also record which byte
9695 // in the vector we should insert into.
9697 // If 2nd operand is undefined, we assume no shifts and no swapping.
9698 if (V2.isUndef()) {
9699 ShiftElts = 0;
9700 Swap = false;
9701 } else {
9702 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9703 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9704 : BigEndianShifts[CurrentElement & 0xF];
9705 Swap = CurrentElement < BytesInVector;
9706 }
9707 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9708 FoundCandidate = true;
9709 break;
9710 }
9711 }
9712
9713 if (!FoundCandidate)
9714 return SDValue();
9715
9716 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9717 // optionally with VECSHL if shift is required.
9718 if (Swap)
9719 std::swap(V1, V2);
9720 if (V2.isUndef())
9721 V2 = V1;
9722 if (ShiftElts) {
9723 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9724 DAG.getConstant(ShiftElts, dl, MVT::i32));
9725 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9726 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9727 }
9728 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9729 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9730}
9731
9732/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9733/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9734/// SDValue.
9735SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9736 SelectionDAG &DAG) const {
9737 const unsigned NumHalfWords = 8;
9738 const unsigned BytesInVector = NumHalfWords * 2;
9739 // Check that the shuffle is on half-words.
9740 if (!isNByteElemShuffleMask(N, 2, 1))
9741 return SDValue();
9742
9743 bool IsLE = Subtarget.isLittleEndian();
9744 SDLoc dl(N);
9745 SDValue V1 = N->getOperand(0);
9746 SDValue V2 = N->getOperand(1);
9747 unsigned ShiftElts = 0, InsertAtByte = 0;
9748 bool Swap = false;
9749
9750 // Shifts required to get the half-word we want at element 3.
9751 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9752 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9753
9754 uint32_t Mask = 0;
9755 uint32_t OriginalOrderLow = 0x1234567;
9756 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9757 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9758 // 32-bit space, only need 4-bit nibbles per element.
9759 for (unsigned i = 0; i < NumHalfWords; ++i) {
9760 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9761 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9762 }
9763
9764 // For each mask element, find out if we're just inserting something
9765 // from V2 into V1 or vice versa. Possible permutations inserting an element
9766 // from V2 into V1:
9767 // X, 1, 2, 3, 4, 5, 6, 7
9768 // 0, X, 2, 3, 4, 5, 6, 7
9769 // 0, 1, X, 3, 4, 5, 6, 7
9770 // 0, 1, 2, X, 4, 5, 6, 7
9771 // 0, 1, 2, 3, X, 5, 6, 7
9772 // 0, 1, 2, 3, 4, X, 6, 7
9773 // 0, 1, 2, 3, 4, 5, X, 7
9774 // 0, 1, 2, 3, 4, 5, 6, X
9775 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9776
9777 bool FoundCandidate = false;
9778 // Go through the mask of half-words to find an element that's being moved
9779 // from one vector to the other.
9780 for (unsigned i = 0; i < NumHalfWords; ++i) {
9781 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9782 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9783 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9784 uint32_t TargetOrder = 0x0;
9785
9786 // If both vector operands for the shuffle are the same vector, the mask
9787 // will contain only elements from the first one and the second one will be
9788 // undef.
9789 if (V2.isUndef()) {
9790 ShiftElts = 0;
9791 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9793 Swap = false;
9794 // Skip if not the correct element or mask of other elements don't equal
9795 // to our expected order.
9796 if (MaskOneElt == VINSERTHSrcElem &&
9797 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9798 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9799 FoundCandidate = true;
9800 break;
9801 }
9802 } else { // If both operands are defined.
9803 // Target order is [8,15] if the current mask is between [0,7].
9804 TargetOrder =
9806 // Skip if mask of other elements don't equal our expected order.
9807 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9808 // We only need the last 3 bits for the number of shifts.
9810 : BigEndianShifts[MaskOneElt & 0x7];
9811 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9812 Swap = MaskOneElt < NumHalfWords;
9813 FoundCandidate = true;
9814 break;
9815 }
9816 }
9817 }
9818
9819 if (!FoundCandidate)
9820 return SDValue();
9821
9822 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9823 // optionally with VECSHL if shift is required.
9824 if (Swap)
9825 std::swap(V1, V2);
9826 if (V2.isUndef())
9827 V2 = V1;
9828 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9829 if (ShiftElts) {
9830 // Double ShiftElts because we're left shifting on v16i8 type.
9831 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9832 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9833 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9834 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9835 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9836 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9837 }
9838 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9839 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9840 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9841 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9842}
9843
9844/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9845/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9846/// return the default SDValue.
9847SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9848 SelectionDAG &DAG) const {
9849 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9850 // to v16i8. Peek through the bitcasts to get the actual operands.
9851 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9852 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9853
9854 auto ShuffleMask = SVN->getMask();
9856 SDLoc DL(SVN);
9857
9858 // Check that we have a four byte shuffle.
9859 if (!isNByteElemShuffleMask(SVN, 4, 1))
9860 return SDValue();
9861
9862 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9863 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9864 std::swap(LHS, RHS);
9867 if (!CommutedSV)
9868 return SDValue();
9869 ShuffleMask = CommutedSV->getMask();
9870 }
9871
9872 // Ensure that the RHS is a vector of constants.
9874 if (!BVN)
9875 return SDValue();
9876
9877 // Check if RHS is a splat of 4-bytes (or smaller).
9879 unsigned SplatBitSize;
9880 bool HasAnyUndefs;
9881 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9882 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9883 SplatBitSize > 32)
9884 return SDValue();
9885
9886 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9887 // The instruction splats a constant C into two words of the source vector
9888 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9889 // Thus we check that the shuffle mask is the equivalent of
9890 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9891 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9892 // within each word are consecutive, so we only need to check the first byte.
9893 SDValue Index;
9894 bool IsLE = Subtarget.isLittleEndian();
9895 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9896 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9897 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9898 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9899 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9900 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9901 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9902 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9903 else
9904 return SDValue();
9905
9906 // If the splat is narrower than 32-bits, we need to get the 32-bit value
9907 // for XXSPLTI32DX.
9908 unsigned SplatVal = APSplatValue.getZExtValue();
9909 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9910 SplatVal |= (SplatVal << SplatBitSize);
9911
9913 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9914 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9915 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9916}
9917
9918/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9919/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9920/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9921/// i.e (or (shl x, C1), (srl x, 128-C1)).
9922SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9923 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9924 assert(Op.getValueType() == MVT::v1i128 &&
9925 "Only set v1i128 as custom, other type shouldn't reach here!");
9926 SDLoc dl(Op);
9927 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9928 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9929 unsigned SHLAmt = N1.getConstantOperandVal(0);
9930 if (SHLAmt % 8 == 0) {
9931 std::array<int, 16> Mask;
9932 std::iota(Mask.begin(), Mask.end(), 0);
9933 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9934 if (SDValue Shuffle =
9935 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9936 DAG.getUNDEF(MVT::v16i8), Mask))
9937 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9938 }
9939 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9940 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9941 DAG.getConstant(SHLAmt, dl, MVT::i32));
9942 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9943 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9944 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9945 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9946}
9947
9948/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9949/// is a shuffle we can handle in a single instruction, return it. Otherwise,
9950/// return the code it can be lowered into. Worst case, it can always be
9951/// lowered into a vperm.
9952SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9953 SelectionDAG &DAG) const {
9954 SDLoc dl(Op);
9955 SDValue V1 = Op.getOperand(0);
9956 SDValue V2 = Op.getOperand(1);
9958
9959 // Any nodes that were combined in the target-independent combiner prior
9960 // to vector legalization will not be sent to the target combine. Try to
9961 // combine it here.
9962 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9964 return NewShuffle;
9965 Op = NewShuffle;
9967 V1 = Op.getOperand(0);
9968 V2 = Op.getOperand(1);
9969 }
9970 EVT VT = Op.getValueType();
9971 bool isLittleEndian = Subtarget.isLittleEndian();
9972
9973 unsigned ShiftElts, InsertAtByte;
9974 bool Swap = false;
9975
9976 // If this is a load-and-splat, we can do that with a single instruction
9977 // in some cases. However if the load has multiple uses, we don't want to
9978 // combine it because that will just produce multiple loads.
9979 bool IsPermutedLoad = false;
9981 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9983 InputLoad->hasOneUse()) {
9985 int SplatIdx =
9987
9988 // The splat index for permuted loads will be in the left half of the vector
9989 // which is strictly wider than the loaded value by 8 bytes. So we need to
9990 // adjust the splat index to point to the correct address in memory.
9991 if (IsPermutedLoad) {
9992 assert((isLittleEndian || IsFourByte) &&
9993 "Unexpected size for permuted load on big endian target");
9994 SplatIdx += IsFourByte ? 2 : 1;
9995 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9996 "Splat of a value outside of the loaded memory");
9997 }
9998
10000 // For 4-byte load-and-splat, we need Power9.
10001 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10002 uint64_t Offset = 0;
10003 if (IsFourByte)
10004 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10005 else
10006 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10007
10008 // If the width of the load is the same as the width of the splat,
10009 // loading with an offset would load the wrong memory.
10010 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10011 Offset = 0;
10012
10013 SDValue BasePtr = LD->getBasePtr();
10014 if (Offset != 0)
10016 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10017 SDValue Ops[] = {
10018 LD->getChain(), // Chain
10019 BasePtr, // BasePtr
10020 DAG.getValueType(Op.getValueType()) // VT
10021 };
10022 SDVTList VTL =
10023 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10024 SDValue LdSplt =
10026 Ops, LD->getMemoryVT(), LD->getMemOperand());
10027 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10028 if (LdSplt.getValueType() != SVOp->getValueType(0))
10029 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10030 return LdSplt;
10031 }
10032 }
10033
10034 // All v2i64 and v2f64 shuffles are legal
10035 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10036 return Op;
10037
10038 if (Subtarget.hasP9Vector() &&
10040 isLittleEndian)) {
10041 if (Swap)
10042 std::swap(V1, V2);
10043 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10044 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10045 if (ShiftElts) {
10046 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10047 DAG.getConstant(ShiftElts, dl, MVT::i32));
10048 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10049 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10050 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10051 }
10052 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10053 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10054 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10055 }
10056
10057 if (Subtarget.hasPrefixInstrs()) {
10059 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10060 return SplatInsertNode;
10061 }
10062
10063 if (Subtarget.hasP9Altivec()) {
10065 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10066 return NewISDNode;
10067
10068 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10069 return NewISDNode;
10070 }
10071
10072 if (Subtarget.hasVSX() &&
10073 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10074 if (Swap)
10075 std::swap(V1, V2);
10076 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10077 SDValue Conv2 =
10078 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10079
10080 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10081 DAG.getConstant(ShiftElts, dl, MVT::i32));
10082 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10083 }
10084
10085 if (Subtarget.hasVSX() &&
10086 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10087 if (Swap)
10088 std::swap(V1, V2);
10089 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10090 SDValue Conv2 =
10091 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10092
10093 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10094 DAG.getConstant(ShiftElts, dl, MVT::i32));
10095 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10096 }
10097
10098 if (Subtarget.hasP9Vector()) {
10100 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10101 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10102 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10103 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10104 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10105 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10106 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10107 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10108 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10109 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10110 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10111 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10112 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10113 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10114 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10115 }
10116 }
10117
10118 if (Subtarget.hasVSX()) {
10119 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10121
10122 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10123 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10124 DAG.getConstant(SplatIdx, dl, MVT::i32));
10125 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10126 }
10127
10128 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10129 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10130 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10131 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10132 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10133 }
10134 }
10135
10136 // Cases that are handled by instructions that take permute immediates
10137 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10138 // selected by the instruction selector.
10139 if (V2.isUndef()) {
10140 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10145 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10146 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10147 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10148 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10149 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10150 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10151 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10152 (Subtarget.hasP8Altivec() && (
10154 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10155 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10156 return Op;
10157 }
10158 }
10159
10160 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10161 // and produce a fixed permutation. If any of these match, do not lower to
10162 // VPERM.
10163 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10164 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10165 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10166 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10167 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10168 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10169 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10170 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10171 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10172 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10173 (Subtarget.hasP8Altivec() && (
10174 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10175 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10176 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10177 return Op;
10178
10179 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10180 // perfect shuffle table to emit an optimal matching sequence.
10181 ArrayRef<int> PermMask = SVOp->getMask();
10182
10183 if (!DisablePerfectShuffle && !isLittleEndian) {
10184 unsigned PFIndexes[4];
10185 bool isFourElementShuffle = true;
10186 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10187 ++i) { // Element number
10188 unsigned EltNo = 8; // Start out undef.
10189 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10190 if (PermMask[i * 4 + j] < 0)
10191 continue; // Undef, ignore it.
10192
10193 unsigned ByteSource = PermMask[i * 4 + j];
10194 if ((ByteSource & 3) != j) {
10195 isFourElementShuffle = false;
10196 break;
10197 }
10198
10199 if (EltNo == 8) {
10200 EltNo = ByteSource / 4;
10201 } else if (EltNo != ByteSource / 4) {
10202 isFourElementShuffle = false;
10203 break;
10204 }
10205 }
10206 PFIndexes[i] = EltNo;
10207 }
10208
10209 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10210 // perfect shuffle vector to determine if it is cost effective to do this as
10211 // discrete instructions, or whether we should use a vperm.
10212 // For now, we skip this for little endian until such time as we have a
10213 // little-endian perfect shuffle table.
10215 // Compute the index in the perfect shuffle table.
10216 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10217 PFIndexes[2] * 9 + PFIndexes[3];
10218
10220 unsigned Cost = (PFEntry >> 30);
10221
10222 // Determining when to avoid vperm is tricky. Many things affect the cost
10223 // of vperm, particularly how many times the perm mask needs to be
10224 // computed. For example, if the perm mask can be hoisted out of a loop or
10225 // is already used (perhaps because there are multiple permutes with the
10226 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10227 // permute mask out of the loop requires an extra register.
10228 //
10229 // As a compromise, we only emit discrete instructions if the shuffle can
10230 // be generated in 3 or fewer operations. When we have loop information
10231 // available, if this block is within a loop, we should avoid using vperm
10232 // for 3-operation perms and use a constant pool load instead.
10233 if (Cost < 3)
10234 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10235 }
10236 }
10237
10238 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10239 // vector that will get spilled to the constant pool.
10240 if (V2.isUndef()) V2 = V1;
10241
10242 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10243}
10244
10245SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10247 SDValue V1, SDValue V2) const {
10248 unsigned Opcode = PPCISD::VPERM;
10249 EVT ValType = V1.getValueType();
10250 SDLoc dl(Op);
10251 bool NeedSwap = false;
10252 bool isLittleEndian = Subtarget.isLittleEndian();
10253 bool isPPC64 = Subtarget.isPPC64();
10254
10255 // Only need to place items backwards in LE,
10256 // the mask will be properly calculated.
10257 if (isLittleEndian)
10258 std::swap(V1, V2);
10259
10260 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10261 (V1->hasOneUse() || V2->hasOneUse())) {
10262 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10263 "XXPERM instead\n");
10264 Opcode = PPCISD::XXPERM;
10265
10266 // The second input to XXPERM is also an output so if the second input has
10267 // multiple uses then copying is necessary, as a result we want the
10268 // single-use operand to be used as the second input to prevent copying.
10269 if (!V2->hasOneUse() && V1->hasOneUse()) {
10270 std::swap(V1, V2);
10271 NeedSwap = !NeedSwap;
10272 }
10273 }
10274
10275 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10276 // that it is in input element units, not in bytes. Convert now.
10277
10278 // For little endian, the order of the input vectors is reversed, and
10279 // the permutation mask is complemented with respect to 31. This is
10280 // necessary to produce proper semantics with the big-endian-based vperm
10281 // instruction.
10283 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10284
10286 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10287
10288 /*
10289 Vectors will be appended like so: [ V1 | v2 ]
10290 XXSWAPD on V1:
10291 [ A | B | C | D ] -> [ C | D | A | B ]
10292 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10293 i.e. index of A, B += 8, and index of C, D -= 8.
10294 XXSWAPD on V2:
10295 [ E | F | G | H ] -> [ G | H | E | F ]
10296 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10297 i.e. index of E, F += 8, index of G, H -= 8
10298 Swap V1 and V2:
10299 [ V1 | V2 ] -> [ V2 | V1 ]
10300 0-15 16-31 0-15 16-31
10301 i.e. index of V1 += 16, index of V2 -= 16
10302 */
10303
10305 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10306 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10307
10308 if (Opcode == PPCISD::XXPERM) {
10309 if (V1HasXXSWAPD) {
10310 if (SrcElt < 8)
10311 SrcElt += 8;
10312 else if (SrcElt < 16)
10313 SrcElt -= 8;
10314 }
10315 if (V2HasXXSWAPD) {
10316 if (SrcElt > 23)
10317 SrcElt -= 8;
10318 else if (SrcElt > 15)
10319 SrcElt += 8;
10320 }
10321 if (NeedSwap) {
10322 if (SrcElt < 16)
10323 SrcElt += 16;
10324 else
10325 SrcElt -= 16;
10326 }
10327 }
10328
10329 for (unsigned j = 0; j != BytesPerElement; ++j)
10330 if (isLittleEndian)
10331 ResultMask.push_back(
10332 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10333 else
10334 ResultMask.push_back(
10335 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10336 }
10337
10338 if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10339 if (V1HasXXSWAPD) {
10340 dl = SDLoc(V1->getOperand(0));
10341 V1 = V1->getOperand(0)->getOperand(1);
10342 }
10343 if (V2HasXXSWAPD) {
10344 dl = SDLoc(V2->getOperand(0));
10345 V2 = V2->getOperand(0)->getOperand(1);
10346 }
10347 if (isPPC64 && ValType != MVT::v2f64)
10348 V1 = DAG.getBitcast(MVT::v2f64, V1);
10349 if (isPPC64 && V2.getValueType() != MVT::v2f64)
10350 V2 = DAG.getBitcast(MVT::v2f64, V2);
10351 }
10352
10354 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10355 LLVM_DEBUG({
10357 if (Opcode == PPCISD::XXPERM) {
10358 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10359 } else {
10360 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10361 }
10362 SVOp->dump();
10363 dbgs() << "With the following permute control vector:\n";
10364 VPermMask.dump();
10365 });
10366
10367 if (Opcode == PPCISD::XXPERM)
10368 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10369
10371 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10372
10373 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10374 return VPERMNode;
10375}
10376
10377/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10378/// vector comparison. If it is, return true and fill in Opc/isDot with
10379/// information about the intrinsic.
10381 bool &isDot, const PPCSubtarget &Subtarget) {
10382 unsigned IntrinsicID =
10383 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10384 CompareOpc = -1;
10385 isDot = false;
10386 switch (IntrinsicID) {
10387 default:
10388 return false;
10389 // Comparison predicates.
10390 case Intrinsic::ppc_altivec_vcmpbfp_p:
10391 CompareOpc = 966;
10392 isDot = true;
10393 break;
10394 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10395 CompareOpc = 198;
10396 isDot = true;
10397 break;
10398 case Intrinsic::ppc_altivec_vcmpequb_p:
10399 CompareOpc = 6;
10400 isDot = true;
10401 break;
10402 case Intrinsic::ppc_altivec_vcmpequh_p:
10403 CompareOpc = 70;
10404 isDot = true;
10405 break;
10406 case Intrinsic::ppc_altivec_vcmpequw_p:
10407 CompareOpc = 134;
10408 isDot = true;
10409 break;
10410 case Intrinsic::ppc_altivec_vcmpequd_p:
10411 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10412 CompareOpc = 199;
10413 isDot = true;
10414 } else
10415 return false;
10416 break;
10417 case Intrinsic::ppc_altivec_vcmpneb_p:
10418 case Intrinsic::ppc_altivec_vcmpneh_p:
10419 case Intrinsic::ppc_altivec_vcmpnew_p:
10420 case Intrinsic::ppc_altivec_vcmpnezb_p:
10421 case Intrinsic::ppc_altivec_vcmpnezh_p:
10422 case Intrinsic::ppc_altivec_vcmpnezw_p:
10423 if (Subtarget.hasP9Altivec()) {
10424 switch (IntrinsicID) {
10425 default:
10426 llvm_unreachable("Unknown comparison intrinsic.");
10427 case Intrinsic::ppc_altivec_vcmpneb_p:
10428 CompareOpc = 7;
10429 break;
10430 case Intrinsic::ppc_altivec_vcmpneh_p:
10431 CompareOpc = 71;
10432 break;
10433 case Intrinsic::ppc_altivec_vcmpnew_p:
10434 CompareOpc = 135;
10435 break;
10436 case Intrinsic::ppc_altivec_vcmpnezb_p:
10437 CompareOpc = 263;
10438 break;
10439 case Intrinsic::ppc_altivec_vcmpnezh_p:
10440 CompareOpc = 327;
10441 break;
10442 case Intrinsic::ppc_altivec_vcmpnezw_p:
10443 CompareOpc = 391;
10444 break;
10445 }
10446 isDot = true;
10447 } else
10448 return false;
10449 break;
10450 case Intrinsic::ppc_altivec_vcmpgefp_p:
10451 CompareOpc = 454;
10452 isDot = true;
10453 break;
10454 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10455 CompareOpc = 710;
10456 isDot = true;
10457 break;
10458 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10459 CompareOpc = 774;
10460 isDot = true;
10461 break;
10462 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10463 CompareOpc = 838;
10464 isDot = true;
10465 break;
10466 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10467 CompareOpc = 902;
10468 isDot = true;
10469 break;
10470 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10471 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10472 CompareOpc = 967;
10473 isDot = true;
10474 } else
10475 return false;
10476 break;
10477 case Intrinsic::ppc_altivec_vcmpgtub_p:
10478 CompareOpc = 518;
10479 isDot = true;
10480 break;
10481 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10482 CompareOpc = 582;
10483 isDot = true;
10484 break;
10485 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10486 CompareOpc = 646;
10487 isDot = true;
10488 break;
10489 case Intrinsic::ppc_altivec_vcmpgtud_p:
10490 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10491 CompareOpc = 711;
10492 isDot = true;
10493 } else
10494 return false;
10495 break;
10496
10497 case Intrinsic::ppc_altivec_vcmpequq:
10498 case Intrinsic::ppc_altivec_vcmpgtsq:
10499 case Intrinsic::ppc_altivec_vcmpgtuq:
10500 if (!Subtarget.isISA3_1())
10501 return false;
10502 switch (IntrinsicID) {
10503 default:
10504 llvm_unreachable("Unknown comparison intrinsic.");
10505 case Intrinsic::ppc_altivec_vcmpequq:
10506 CompareOpc = 455;
10507 break;
10508 case Intrinsic::ppc_altivec_vcmpgtsq:
10509 CompareOpc = 903;
10510 break;
10511 case Intrinsic::ppc_altivec_vcmpgtuq:
10512 CompareOpc = 647;
10513 break;
10514 }
10515 break;
10516
10517 // VSX predicate comparisons use the same infrastructure
10518 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10519 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10520 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10521 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10522 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10523 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10524 if (Subtarget.hasVSX()) {
10525 switch (IntrinsicID) {
10526 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10527 CompareOpc = 99;
10528 break;
10529 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10530 CompareOpc = 115;
10531 break;
10532 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10533 CompareOpc = 107;
10534 break;
10535 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10536 CompareOpc = 67;
10537 break;
10538 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10539 CompareOpc = 83;
10540 break;
10541 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10542 CompareOpc = 75;
10543 break;
10544 }
10545 isDot = true;
10546 } else
10547 return false;
10548 break;
10549
10550 // Normal Comparisons.
10551 case Intrinsic::ppc_altivec_vcmpbfp:
10552 CompareOpc = 966;
10553 break;
10554 case Intrinsic::ppc_altivec_vcmpeqfp:
10555 CompareOpc = 198;
10556 break;
10557 case Intrinsic::ppc_altivec_vcmpequb:
10558 CompareOpc = 6;
10559 break;
10560 case Intrinsic::ppc_altivec_vcmpequh:
10561 CompareOpc = 70;
10562 break;
10563 case Intrinsic::ppc_altivec_vcmpequw:
10564 CompareOpc = 134;
10565 break;
10566 case Intrinsic::ppc_altivec_vcmpequd:
10567 if (Subtarget.hasP8Altivec())
10568 CompareOpc = 199;
10569 else
10570 return false;
10571 break;
10572 case Intrinsic::ppc_altivec_vcmpneb:
10573 case Intrinsic::ppc_altivec_vcmpneh:
10574 case Intrinsic::ppc_altivec_vcmpnew:
10575 case Intrinsic::ppc_altivec_vcmpnezb:
10576 case Intrinsic::ppc_altivec_vcmpnezh:
10577 case Intrinsic::ppc_altivec_vcmpnezw:
10578 if (Subtarget.hasP9Altivec())
10579 switch (IntrinsicID) {
10580 default:
10581 llvm_unreachable("Unknown comparison intrinsic.");
10582 case Intrinsic::ppc_altivec_vcmpneb:
10583 CompareOpc = 7;
10584 break;
10585 case Intrinsic::ppc_altivec_vcmpneh:
10586 CompareOpc = 71;
10587 break;
10588 case Intrinsic::ppc_altivec_vcmpnew:
10589 CompareOpc = 135;
10590 break;
10591 case Intrinsic::ppc_altivec_vcmpnezb:
10592 CompareOpc = 263;
10593 break;
10594 case Intrinsic::ppc_altivec_vcmpnezh:
10595 CompareOpc = 327;
10596 break;
10597 case Intrinsic::ppc_altivec_vcmpnezw:
10598 CompareOpc = 391;
10599 break;
10600 }
10601 else
10602 return false;
10603 break;
10604 case Intrinsic::ppc_altivec_vcmpgefp:
10605 CompareOpc = 454;
10606 break;
10607 case Intrinsic::ppc_altivec_vcmpgtfp:
10608 CompareOpc = 710;
10609 break;
10610 case Intrinsic::ppc_altivec_vcmpgtsb:
10611 CompareOpc = 774;
10612 break;
10613 case Intrinsic::ppc_altivec_vcmpgtsh:
10614 CompareOpc = 838;
10615 break;
10616 case Intrinsic::ppc_altivec_vcmpgtsw:
10617 CompareOpc = 902;
10618 break;
10619 case Intrinsic::ppc_altivec_vcmpgtsd:
10620 if (Subtarget.hasP8Altivec())
10621 CompareOpc = 967;
10622 else
10623 return false;
10624 break;
10625 case Intrinsic::ppc_altivec_vcmpgtub:
10626 CompareOpc = 518;
10627 break;
10628 case Intrinsic::ppc_altivec_vcmpgtuh:
10629 CompareOpc = 582;
10630 break;
10631 case Intrinsic::ppc_altivec_vcmpgtuw:
10632 CompareOpc = 646;
10633 break;
10634 case Intrinsic::ppc_altivec_vcmpgtud:
10635 if (Subtarget.hasP8Altivec())
10636 CompareOpc = 711;
10637 else
10638 return false;
10639 break;
10640 case Intrinsic::ppc_altivec_vcmpequq_p:
10641 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10642 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10643 if (!Subtarget.isISA3_1())
10644 return false;
10645 switch (IntrinsicID) {
10646 default:
10647 llvm_unreachable("Unknown comparison intrinsic.");
10648 case Intrinsic::ppc_altivec_vcmpequq_p:
10649 CompareOpc = 455;
10650 break;
10651 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10652 CompareOpc = 903;
10653 break;
10654 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10655 CompareOpc = 647;
10656 break;
10657 }
10658 isDot = true;
10659 break;
10660 }
10661 return true;
10662}
10663
10664/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10665/// lower, do it, otherwise return null.
10666SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10667 SelectionDAG &DAG) const {
10668 unsigned IntrinsicID =
10669 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10670
10671 SDLoc dl(Op);
10672
10673 switch (IntrinsicID) {
10674 case Intrinsic::thread_pointer:
10675 // Reads the thread pointer register, used for __builtin_thread_pointer.
10676 if (Subtarget.isPPC64())
10677 return DAG.getRegister(PPC::X13, MVT::i64);
10678 return DAG.getRegister(PPC::R2, MVT::i32);
10679
10680 case Intrinsic::ppc_mma_disassemble_acc: {
10681 if (Subtarget.isISAFuture()) {
10682 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10683 SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10684 ArrayRef(ReturnTypes, 2),
10685 Op.getOperand(1)),
10686 0);
10688 SDValue Value = SDValue(WideVec.getNode(), 0);
10689 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10690
10691 SDValue Extract;
10692 Extract = DAG.getNode(
10693 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10694 Subtarget.isLittleEndian() ? Value2 : Value,
10695 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10696 dl, getPointerTy(DAG.getDataLayout())));
10697 RetOps.push_back(Extract);
10698 Extract = DAG.getNode(
10699 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10700 Subtarget.isLittleEndian() ? Value2 : Value,
10701 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10702 dl, getPointerTy(DAG.getDataLayout())));
10703 RetOps.push_back(Extract);
10704 Extract = DAG.getNode(
10705 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10706 Subtarget.isLittleEndian() ? Value : Value2,
10707 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10708 dl, getPointerTy(DAG.getDataLayout())));
10709 RetOps.push_back(Extract);
10710 Extract = DAG.getNode(
10711 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10712 Subtarget.isLittleEndian() ? Value : Value2,
10713 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10714 dl, getPointerTy(DAG.getDataLayout())));
10715 RetOps.push_back(Extract);
10716 return DAG.getMergeValues(RetOps, dl);
10717 }
10718 [[fallthrough]];
10719 }
10720 case Intrinsic::ppc_vsx_disassemble_pair: {
10721 int NumVecs = 2;
10722 SDValue WideVec = Op.getOperand(1);
10723 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10724 NumVecs = 4;
10725 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10726 }
10728 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10729 SDValue Extract = DAG.getNode(
10730 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10731 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10732 : VecNo,
10733 dl, getPointerTy(DAG.getDataLayout())));
10734 RetOps.push_back(Extract);
10735 }
10736 return DAG.getMergeValues(RetOps, dl);
10737 }
10738
10739 case Intrinsic::ppc_mma_xxmfacc:
10740 case Intrinsic::ppc_mma_xxmtacc: {
10741 // Allow pre-isa-future subtargets to lower as normal.
10742 if (!Subtarget.isISAFuture())
10743 return SDValue();
10744 // The intrinsics for xxmtacc and xxmfacc take one argument of
10745 // type v512i1, for future cpu the corresponding wacc instruction
10746 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10747 // the need to produce the xxm[t|f]acc.
10748 SDValue WideVec = Op.getOperand(1);
10750 return SDValue();
10751 }
10752
10753 case Intrinsic::ppc_unpack_longdouble: {
10754 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10755 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10756 "Argument of long double unpack must be 0 or 1!");
10757 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10758 DAG.getConstant(!!(Idx->getSExtValue()), dl,
10759 Idx->getValueType(0)));
10760 }
10761
10762 case Intrinsic::ppc_compare_exp_lt:
10763 case Intrinsic::ppc_compare_exp_gt:
10764 case Intrinsic::ppc_compare_exp_eq:
10765 case Intrinsic::ppc_compare_exp_uo: {
10766 unsigned Pred;
10767 switch (IntrinsicID) {
10768 case Intrinsic::ppc_compare_exp_lt:
10769 Pred = PPC::PRED_LT;
10770 break;
10771 case Intrinsic::ppc_compare_exp_gt:
10772 Pred = PPC::PRED_GT;
10773 break;
10774 case Intrinsic::ppc_compare_exp_eq:
10775 Pred = PPC::PRED_EQ;
10776 break;
10777 case Intrinsic::ppc_compare_exp_uo:
10778 Pred = PPC::PRED_UN;
10779 break;
10780 }
10781 return SDValue(
10782 DAG.getMachineNode(
10783 PPC::SELECT_CC_I4, dl, MVT::i32,
10784 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10785 Op.getOperand(1), Op.getOperand(2)),
10786 0),
10787 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10788 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10789 0);
10790 }
10791 case Intrinsic::ppc_test_data_class: {
10792 EVT OpVT = Op.getOperand(1).getValueType();
10793 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10794 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10795 : PPC::XSTSTDCSP);
10796 return SDValue(
10797 DAG.getMachineNode(
10798 PPC::SELECT_CC_I4, dl, MVT::i32,
10799 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10800 Op.getOperand(1)),
10801 0),
10802 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10803 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10804 0);
10805 }
10806 case Intrinsic::ppc_fnmsub: {
10807 EVT VT = Op.getOperand(1).getValueType();
10808 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10809 return DAG.getNode(
10810 ISD::FNEG, dl, VT,
10811 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10812 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10813 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10814 Op.getOperand(2), Op.getOperand(3));
10815 }
10816 case Intrinsic::ppc_convert_f128_to_ppcf128:
10817 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10818 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10819 ? RTLIB::CONVERT_PPCF128_F128
10820 : RTLIB::CONVERT_F128_PPCF128;
10821 MakeLibCallOptions CallOptions;
10822 std::pair<SDValue, SDValue> Result =
10823 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10824 dl, SDValue());
10825 return Result.first;
10826 }
10827 case Intrinsic::ppc_maxfe:
10828 case Intrinsic::ppc_maxfl:
10829 case Intrinsic::ppc_maxfs:
10830 case Intrinsic::ppc_minfe:
10831 case Intrinsic::ppc_minfl:
10832 case Intrinsic::ppc_minfs: {
10833 EVT VT = Op.getValueType();
10834 assert(
10835 all_of(Op->ops().drop_front(4),
10836 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10837 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10838 (void)VT;
10840 if (IntrinsicID == Intrinsic::ppc_minfe ||
10841 IntrinsicID == Intrinsic::ppc_minfl ||
10842 IntrinsicID == Intrinsic::ppc_minfs)
10843 CC = ISD::SETLT;
10844 unsigned I = Op.getNumOperands() - 2, Cnt = I;
10845 SDValue Res = Op.getOperand(I);
10846 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10847 Res =
10848 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10849 }
10850 return Res;
10851 }
10852 }
10853
10854 // If this is a lowered altivec predicate compare, CompareOpc is set to the
10855 // opcode number of the comparison.
10856 int CompareOpc;
10857 bool isDot;
10858 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10859 return SDValue(); // Don't custom lower most intrinsics.
10860
10861 // If this is a non-dot comparison, make the VCMP node and we are done.
10862 if (!isDot) {
10863 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10864 Op.getOperand(1), Op.getOperand(2),
10865 DAG.getConstant(CompareOpc, dl, MVT::i32));
10866 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10867 }
10868
10869 // Create the PPCISD altivec 'dot' comparison node.
10870 SDValue Ops[] = {
10871 Op.getOperand(2), // LHS
10872 Op.getOperand(3), // RHS
10873 DAG.getConstant(CompareOpc, dl, MVT::i32)
10874 };
10875 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10876 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10877
10878 // Now that we have the comparison, emit a copy from the CR to a GPR.
10879 // This is flagged to the above dot comparison.
10880 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10881 DAG.getRegister(PPC::CR6, MVT::i32),
10882 CompNode.getValue(1));
10883
10884 // Unpack the result based on how the target uses it.
10885 unsigned BitNo; // Bit # of CR6.
10886 bool InvertBit; // Invert result?
10887 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10888 default: // Can't happen, don't crash on invalid number though.
10889 case 0: // Return the value of the EQ bit of CR6.
10890 BitNo = 0; InvertBit = false;
10891 break;
10892 case 1: // Return the inverted value of the EQ bit of CR6.
10893 BitNo = 0; InvertBit = true;
10894 break;
10895 case 2: // Return the value of the LT bit of CR6.
10896 BitNo = 2; InvertBit = false;
10897 break;
10898 case 3: // Return the inverted value of the LT bit of CR6.
10899 BitNo = 2; InvertBit = true;
10900 break;
10901 }
10902
10903 // Shift the bit into the low position.
10904 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10905 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10906 // Isolate the bit.
10907 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10908 DAG.getConstant(1, dl, MVT::i32));
10909
10910 // If we are supposed to, toggle the bit.
10911 if (InvertBit)
10912 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10913 DAG.getConstant(1, dl, MVT::i32));
10914 return Flags;
10915}
10916
10917SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10918 SelectionDAG &DAG) const {
10919 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10920 // the beginning of the argument list.
10921 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10922 SDLoc DL(Op);
10923 switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10924 case Intrinsic::ppc_cfence: {
10925 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10926 assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10927 SDValue Val = Op.getOperand(ArgStart + 1);
10928 EVT Ty = Val.getValueType();
10929 if (Ty == MVT::i128) {
10930 // FIXME: Testing one of two paired registers is sufficient to guarantee
10931 // ordering?
10932 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10933 }
10934 return SDValue(
10935 DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10936 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10937 Op.getOperand(0)),
10938 0);
10939 }
10940 default:
10941 break;
10942 }
10943 return SDValue();
10944}
10945
10946// Lower scalar BSWAP64 to xxbrd.
10947SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10948 SDLoc dl(Op);
10949 if (!Subtarget.isPPC64())
10950 return Op;
10951 // MTVSRDD
10952 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10953 Op.getOperand(0));
10954 // XXBRD
10955 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10956 // MFVSRD
10957 int VectorIndex = 0;
10958 if (Subtarget.isLittleEndian())
10959 VectorIndex = 1;
10960 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10961 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10962 return Op;
10963}
10964
10965// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10966// compared to a value that is atomically loaded (atomic loads zero-extend).
10967SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10968 SelectionDAG &DAG) const {
10969 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10970 "Expecting an atomic compare-and-swap here.");
10971 SDLoc dl(Op);
10972 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10973 EVT MemVT = AtomicNode->getMemoryVT();
10974 if (MemVT.getSizeInBits() >= 32)
10975 return Op;
10976
10977 SDValue CmpOp = Op.getOperand(2);
10978 // If this is already correctly zero-extended, leave it alone.
10979 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10981 return Op;
10982
10983 // Clear the high bits of the compare operand.
10984 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10986 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10987 DAG.getConstant(MaskVal, dl, MVT::i32));
10988
10989 // Replace the existing compare operand with the properly zero-extended one.
10991 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10992 Ops.push_back(AtomicNode->getOperand(i));
10993 Ops[2] = NewCmpOp;
10994 MachineMemOperand *MMO = AtomicNode->getMemOperand();
10995 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10996 auto NodeTy =
10997 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10998 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10999}
11000
11001SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11002 SelectionDAG &DAG) const {
11003 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11004 EVT MemVT = N->getMemoryVT();
11005 assert(MemVT.getSimpleVT() == MVT::i128 &&
11006 "Expect quadword atomic operations");
11007 SDLoc dl(N);
11008 unsigned Opc = N->getOpcode();
11009 switch (Opc) {
11010 case ISD::ATOMIC_LOAD: {
11011 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11012 // lowered to ppc instructions by pattern matching instruction selector.
11013 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11015 N->getOperand(0),
11016 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11017 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11018 Ops.push_back(N->getOperand(I));
11020 Ops, MemVT, N->getMemOperand());
11021 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11022 SDValue ValHi =
11023 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11024 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11025 DAG.getConstant(64, dl, MVT::i32));
11026 SDValue Val =
11027 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11028 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11029 {Val, LoadedVal.getValue(2)});
11030 }
11031 case ISD::ATOMIC_STORE: {
11032 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11033 // lowered to ppc instructions by pattern matching instruction selector.
11034 SDVTList Tys = DAG.getVTList(MVT::Other);
11036 N->getOperand(0),
11037 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11038 SDValue Val = N->getOperand(2);
11039 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11040 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11041 DAG.getConstant(64, dl, MVT::i32));
11042 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11043 Ops.push_back(ValLo);
11044 Ops.push_back(ValHi);
11045 Ops.push_back(N->getOperand(1));
11046 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11047 N->getMemOperand());
11048 }
11049 default:
11050 llvm_unreachable("Unexpected atomic opcode");
11051 }
11052}
11053
11055 SelectionDAG &DAG,
11056 const PPCSubtarget &Subtarget) {
11057 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11058
11059 enum DataClassMask {
11060 DC_NAN = 1 << 6,
11061 DC_NEG_INF = 1 << 4,
11062 DC_POS_INF = 1 << 5,
11063 DC_NEG_ZERO = 1 << 2,
11064 DC_POS_ZERO = 1 << 3,
11065 DC_NEG_SUBNORM = 1,
11066 DC_POS_SUBNORM = 1 << 1,
11067 };
11068
11069 EVT VT = Op.getValueType();
11070
11071 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11072 : VT == MVT::f64 ? PPC::XSTSTDCDP
11073 : PPC::XSTSTDCSP;
11074
11075 if (Mask == fcAllFlags)
11076 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11077 if (Mask == 0)
11078 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11079
11080 // When it's cheaper or necessary to test reverse flags.
11081 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11082 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11083 return DAG.getNOT(Dl, Rev, MVT::i1);
11084 }
11085
11086 // Power doesn't support testing whether a value is 'normal'. Test the rest
11087 // first, and test if it's 'not not-normal' with expected sign.
11088 if (Mask & fcNormal) {
11090 TestOp, Dl, MVT::i32,
11094 Dl, MVT::i32),
11095 Op),
11096 0);
11097 // Sign are stored in CR bit 0, result are in CR bit 2.
11098 SDValue Sign(
11099 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11100 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11101 0);
11102 SDValue Normal(DAG.getNOT(
11103 Dl,
11105 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11106 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11107 0),
11108 MVT::i1));
11109 if (Mask & fcPosNormal)
11110 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11111 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11112 if (Mask == fcPosNormal || Mask == fcNegNormal)
11113 return Result;
11114
11115 return DAG.getNode(
11116 ISD::OR, Dl, MVT::i1,
11117 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11118 }
11119
11120 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11121 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11122 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11123 bool IsQuiet = Mask & fcQNan;
11124 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11125
11126 // Quietness is determined by the first bit in fraction field.
11127 uint64_t QuietMask = 0;
11129 if (VT == MVT::f128) {
11130 HighWord = DAG.getNode(
11131 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11132 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11133 QuietMask = 0x8000;
11134 } else if (VT == MVT::f64) {
11135 if (Subtarget.isPPC64()) {
11136 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11137 DAG.getBitcast(MVT::i64, Op),
11138 DAG.getConstant(1, Dl, MVT::i32));
11139 } else {
11140 SDValue Vec = DAG.getBitcast(
11141 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11142 HighWord = DAG.getNode(
11143 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11144 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11145 }
11146 QuietMask = 0x80000;
11147 } else if (VT == MVT::f32) {
11148 HighWord = DAG.getBitcast(MVT::i32, Op);
11149 QuietMask = 0x400000;
11150 }
11151 SDValue NanRes = DAG.getSetCC(
11152 Dl, MVT::i1,
11153 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11154 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11155 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11156 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11157 if (Mask == fcQNan || Mask == fcSNan)
11158 return NanRes;
11159
11160 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11161 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11162 NanRes);
11163 }
11164
11165 unsigned NativeMask = 0;
11166 if ((Mask & fcNan) == fcNan)
11167 NativeMask |= DC_NAN;
11168 if (Mask & fcNegInf)
11170 if (Mask & fcPosInf)
11172 if (Mask & fcNegZero)
11174 if (Mask & fcPosZero)
11176 if (Mask & fcNegSubnormal)
11178 if (Mask & fcPosSubnormal)
11180 return SDValue(
11181 DAG.getMachineNode(
11182 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11184 TestOp, Dl, MVT::i32,
11185 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11186 0),
11187 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11188 0);
11189}
11190
11191SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11192 SelectionDAG &DAG) const {
11193 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11194 SDValue LHS = Op.getOperand(0);
11195 const auto *RHS = cast<ConstantSDNode>(Op.getOperand(1));
11196 SDLoc Dl(Op);
11197 FPClassTest Category = static_cast<FPClassTest>(RHS->getZExtValue());
11198 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11199}
11200
11201SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11202 SelectionDAG &DAG) const {
11203 SDLoc dl(Op);
11204 // Create a stack slot that is 16-byte aligned.
11206 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11208 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11209
11210 // Store the input value into Value#0 of the stack slot.
11211 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11213 // Load it out.
11214 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11215}
11216
11217SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11218 SelectionDAG &DAG) const {
11219 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11220 "Should only be called for ISD::INSERT_VECTOR_ELT");
11221
11222 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11223
11224 EVT VT = Op.getValueType();
11225 SDLoc dl(Op);
11226 SDValue V1 = Op.getOperand(0);
11227 SDValue V2 = Op.getOperand(1);
11228
11229 if (VT == MVT::v2f64 && C)
11230 return Op;
11231
11232 if (Subtarget.hasP9Vector()) {
11233 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11234 // because on P10, it allows this specific insert_vector_elt load pattern to
11235 // utilize the refactored load and store infrastructure in order to exploit
11236 // prefixed loads.
11237 // On targets with inexpensive direct moves (Power9 and up), a
11238 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11239 // load since a single precision load will involve conversion to double
11240 // precision on the load followed by another conversion to single precision.
11241 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11242 (isa<LoadSDNode>(V2))) {
11243 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11244 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11246 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11247 BitcastLoad, Op.getOperand(2));
11248 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11249 }
11250 }
11251
11252 if (Subtarget.isISA3_1()) {
11253 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11254 return SDValue();
11255 // On P10, we have legal lowering for constant and variable indices for
11256 // all vectors.
11257 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11258 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11259 return Op;
11260 }
11261
11262 // Before P10, we have legal lowering for constant indices but not for
11263 // variable ones.
11264 if (!C)
11265 return SDValue();
11266
11267 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11268 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11269 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11271 unsigned InsertAtElement = C->getZExtValue();
11273 if (Subtarget.isLittleEndian()) {
11275 }
11276 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11277 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11278 }
11279 return Op;
11280}
11281
11282SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11283 SelectionDAG &DAG) const {
11284 SDLoc dl(Op);
11285 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11286 SDValue LoadChain = LN->getChain();
11287 SDValue BasePtr = LN->getBasePtr();
11288 EVT VT = Op.getValueType();
11289
11290 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11291 return Op;
11292
11293 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11294 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11295 // 2 or 4 vsx registers.
11296 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11297 "Type unsupported without MMA");
11298 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11299 "Type unsupported without paired vector support");
11300 Align Alignment = LN->getAlign();
11303 unsigned NumVecs = VT.getSizeInBits() / 128;
11304 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11305 SDValue Load =
11306 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11307 LN->getPointerInfo().getWithOffset(Idx * 16),
11308 commonAlignment(Alignment, Idx * 16),
11309 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11310 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11311 DAG.getConstant(16, dl, BasePtr.getValueType()));
11312 Loads.push_back(Load);
11313 LoadChains.push_back(Load.getValue(1));
11314 }
11315 if (Subtarget.isLittleEndian()) {
11316 std::reverse(Loads.begin(), Loads.end());
11317 std::reverse(LoadChains.begin(), LoadChains.end());
11318 }
11319 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11320 SDValue Value =
11321 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11322 dl, VT, Loads);
11323 SDValue RetOps[] = {Value, TF};
11324 return DAG.getMergeValues(RetOps, dl);
11325}
11326
11327SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11328 SelectionDAG &DAG) const {
11329 SDLoc dl(Op);
11330 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11331 SDValue StoreChain = SN->getChain();
11332 SDValue BasePtr = SN->getBasePtr();
11333 SDValue Value = SN->getValue();
11334 SDValue Value2 = SN->getValue();
11335 EVT StoreVT = Value.getValueType();
11336
11337 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11338 return Op;
11339
11340 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11341 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11342 // underlying registers individually.
11343 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11344 "Type unsupported without MMA");
11345 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11346 "Type unsupported without paired vector support");
11347 Align Alignment = SN->getAlign();
11349 unsigned NumVecs = 2;
11350 if (StoreVT == MVT::v512i1) {
11351 if (Subtarget.isISAFuture()) {
11352 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11354 PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11355
11356 Value = SDValue(ExtNode, 0);
11357 Value2 = SDValue(ExtNode, 1);
11358 } else
11359 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11360 NumVecs = 4;
11361 }
11362 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11363 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11364 SDValue Elt;
11365 if (Subtarget.isISAFuture()) {
11366 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11367 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11368 Idx > 1 ? Value2 : Value,
11369 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11370 } else
11371 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11372 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11373
11374 SDValue Store =
11375 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11376 SN->getPointerInfo().getWithOffset(Idx * 16),
11377 commonAlignment(Alignment, Idx * 16),
11378 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11379 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11380 DAG.getConstant(16, dl, BasePtr.getValueType()));
11381 Stores.push_back(Store);
11382 }
11383 SDValue TF = DAG.getTokenFactor(dl, Stores);
11384 return TF;
11385}
11386
11387SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11388 SDLoc dl(Op);
11389 if (Op.getValueType() == MVT::v4i32) {
11390 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11391
11392 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11393 // +16 as shift amt.
11394 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11395 SDValue RHSSwap = // = vrlw RHS, 16
11396 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11397
11398 // Shrinkify inputs to v8i16.
11399 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11400 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11401 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11402
11403 // Low parts multiplied together, generating 32-bit results (we ignore the
11404 // top parts).
11405 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11406 LHS, RHS, DAG, dl, MVT::v4i32);
11407
11408 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11409 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11410 // Shift the high parts up 16 bits.
11411 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11412 Neg16, DAG, dl);
11413 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11414 } else if (Op.getValueType() == MVT::v16i8) {
11415 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11416 bool isLittleEndian = Subtarget.isLittleEndian();
11417
11418 // Multiply the even 8-bit parts, producing 16-bit sums.
11419 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11420 LHS, RHS, DAG, dl, MVT::v8i16);
11421 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11422
11423 // Multiply the odd 8-bit parts, producing 16-bit sums.
11424 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11425 LHS, RHS, DAG, dl, MVT::v8i16);
11426 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11427
11428 // Merge the results together. Because vmuleub and vmuloub are
11429 // instructions with a big-endian bias, we must reverse the
11430 // element numbering and reverse the meaning of "odd" and "even"
11431 // when generating little endian code.
11432 int Ops[16];
11433 for (unsigned i = 0; i != 8; ++i) {
11434 if (isLittleEndian) {
11435 Ops[i*2 ] = 2*i;
11436 Ops[i*2+1] = 2*i+16;
11437 } else {
11438 Ops[i*2 ] = 2*i+1;
11439 Ops[i*2+1] = 2*i+1+16;
11440 }
11441 }
11442 if (isLittleEndian)
11443 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11444 else
11445 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11446 } else {
11447 llvm_unreachable("Unknown mul to lower!");
11448 }
11449}
11450
11451SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11452 bool IsStrict = Op->isStrictFPOpcode();
11453 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11454 !Subtarget.hasP9Vector())
11455 return SDValue();
11456
11457 return Op;
11458}
11459
11460// Custom lowering for fpext vf32 to v2f64
11461SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11462
11463 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11464 "Should only be called for ISD::FP_EXTEND");
11465
11466 // FIXME: handle extends from half precision float vectors on P9.
11467 // We only want to custom lower an extend from v2f32 to v2f64.
11468 if (Op.getValueType() != MVT::v2f64 ||
11469 Op.getOperand(0).getValueType() != MVT::v2f32)
11470 return SDValue();
11471
11472 SDLoc dl(Op);
11473 SDValue Op0 = Op.getOperand(0);
11474
11475 switch (Op0.getOpcode()) {
11476 default:
11477 return SDValue();
11479 assert(Op0.getNumOperands() == 2 &&
11481 "Node should have 2 operands with second one being a constant!");
11482
11483 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11484 return SDValue();
11485
11486 // Custom lower is only done for high or low doubleword.
11487 int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11488 if (Idx % 2 != 0)
11489 return SDValue();
11490
11491 // Since input is v4f32, at this point Idx is either 0 or 2.
11492 // Shift to get the doubleword position we want.
11493 int DWord = Idx >> 1;
11494
11495 // High and low word positions are different on little endian.
11496 if (Subtarget.isLittleEndian())
11497 DWord ^= 0x1;
11498
11499 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11500 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11501 }
11502 case ISD::FADD:
11503 case ISD::FMUL:
11504 case ISD::FSUB: {
11505 SDValue NewLoad[2];
11506 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11507 // Ensure both input are loads.
11508 SDValue LdOp = Op0.getOperand(i);
11509 if (LdOp.getOpcode() != ISD::LOAD)
11510 return SDValue();
11511 // Generate new load node.
11513 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11515 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11516 LD->getMemoryVT(), LD->getMemOperand());
11517 }
11518 SDValue NewOp =
11519 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11520 NewLoad[1], Op0.getNode()->getFlags());
11521 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11522 DAG.getConstant(0, dl, MVT::i32));
11523 }
11524 case ISD::LOAD: {
11526 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11527 SDValue NewLd = DAG.getMemIntrinsicNode(
11528 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11529 LD->getMemoryVT(), LD->getMemOperand());
11530 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11531 DAG.getConstant(0, dl, MVT::i32));
11532 }
11533 }
11534 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11535}
11536
11537/// LowerOperation - Provide custom lowering hooks for some operations.
11538///
11540 switch (Op.getOpcode()) {
11541 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11542 case ISD::FPOW: return lowerPow(Op, DAG);
11543 case ISD::FSIN: return lowerSin(Op, DAG);
11544 case ISD::FCOS: return lowerCos(Op, DAG);
11545 case ISD::FLOG: return lowerLog(Op, DAG);
11546 case ISD::FLOG10: return lowerLog10(Op, DAG);
11547 case ISD::FEXP: return lowerExp(Op, DAG);
11548 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11549 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11550 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11551 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11552 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11553 case ISD::STRICT_FSETCC:
11555 case ISD::SETCC: return LowerSETCC(Op, DAG);
11556 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11557 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11558
11559 case ISD::INLINEASM:
11560 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11561 // Variable argument lowering.
11562 case ISD::VASTART: return LowerVASTART(Op, DAG);
11563 case ISD::VAARG: return LowerVAARG(Op, DAG);
11564 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11565
11566 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11567 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11569 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11570
11571 // Exception handling lowering.
11572 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11573 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11574 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11575
11576 case ISD::LOAD: return LowerLOAD(Op, DAG);
11577 case ISD::STORE: return LowerSTORE(Op, DAG);
11578 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11579 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11582 case ISD::FP_TO_UINT:
11583 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11586 case ISD::UINT_TO_FP:
11587 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11588 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11589
11590 // Lower 64-bit shifts.
11591 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11592 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11593 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11594
11595 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11596 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11597
11598 // Vector-related lowering.
11599 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11600 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11601 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11602 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11603 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11604 case ISD::MUL: return LowerMUL(Op, DAG);
11605 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11607 case ISD::FP_ROUND:
11608 return LowerFP_ROUND(Op, DAG);
11609 case ISD::ROTL: return LowerROTL(Op, DAG);
11610
11611 // For counter-based loop handling.
11612 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11613
11614 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11615
11616 // Frame & Return address.
11617 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11618 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11619
11621 return LowerINTRINSIC_VOID(Op, DAG);
11622 case ISD::BSWAP:
11623 return LowerBSWAP(Op, DAG);
11625 return LowerATOMIC_CMP_SWAP(Op, DAG);
11626 case ISD::ATOMIC_STORE:
11627 return LowerATOMIC_LOAD_STORE(Op, DAG);
11628 case ISD::IS_FPCLASS:
11629 return LowerIS_FPCLASS(Op, DAG);
11630 }
11631}
11632
11635 SelectionDAG &DAG) const {
11636 SDLoc dl(N);
11637 switch (N->getOpcode()) {
11638 default:
11639 llvm_unreachable("Do not know how to custom type legalize this operation!");
11640 case ISD::ATOMIC_LOAD: {
11641 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11642 Results.push_back(Res);
11643 Results.push_back(Res.getValue(1));
11644 break;
11645 }
11646 case ISD::READCYCLECOUNTER: {
11647 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11648 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11649
11650 Results.push_back(
11651 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11652 Results.push_back(RTB.getValue(2));
11653 break;
11654 }
11656 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11657 Intrinsic::loop_decrement)
11658 break;
11659
11660 assert(N->getValueType(0) == MVT::i1 &&
11661 "Unexpected result type for CTR decrement intrinsic");
11663 N->getValueType(0));
11664 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11665 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11666 N->getOperand(1));
11667
11668 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11669 Results.push_back(NewInt.getValue(1));
11670 break;
11671 }
11673 switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11674 case Intrinsic::ppc_pack_longdouble:
11675 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11676 N->getOperand(2), N->getOperand(1)));
11677 break;
11678 case Intrinsic::ppc_maxfe:
11679 case Intrinsic::ppc_minfe:
11680 case Intrinsic::ppc_fnmsub:
11681 case Intrinsic::ppc_convert_f128_to_ppcf128:
11682 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11683 break;
11684 }
11685 break;
11686 }
11687 case ISD::VAARG: {
11688 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11689 return;
11690
11691 EVT VT = N->getValueType(0);
11692
11693 if (VT == MVT::i64) {
11694 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11695
11696 Results.push_back(NewNode);
11697 Results.push_back(NewNode.getValue(1));
11698 }
11699 return;
11700 }
11703 case ISD::FP_TO_SINT:
11704 case ISD::FP_TO_UINT: {
11705 // LowerFP_TO_INT() can only handle f32 and f64.
11706 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11707 MVT::ppcf128)
11708 return;
11709 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11710 Results.push_back(LoweredValue);
11711 if (N->isStrictFPOpcode())
11712 Results.push_back(LoweredValue.getValue(1));
11713 return;
11714 }
11715 case ISD::TRUNCATE: {
11716 if (!N->getValueType(0).isVector())
11717 return;
11718 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11719 if (Lowered)
11720 Results.push_back(Lowered);
11721 return;
11722 }
11723 case ISD::FSHL:
11724 case ISD::FSHR:
11725 // Don't handle funnel shifts here.
11726 return;
11727 case ISD::BITCAST:
11728 // Don't handle bitcast here.
11729 return;
11730 case ISD::FP_EXTEND:
11731 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11732 if (Lowered)
11733 Results.push_back(Lowered);
11734 return;
11735 }
11736}
11737
11738//===----------------------------------------------------------------------===//
11739// Other Lowering Code
11740//===----------------------------------------------------------------------===//
11741
11743 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11744 Function *Func = Intrinsic::getDeclaration(M, Id);
11745 return Builder.CreateCall(Func, {});
11746}
11747
11748// The mappings for emitLeading/TrailingFence is taken from
11749// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11751 Instruction *Inst,
11752 AtomicOrdering Ord) const {
11754 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11755 if (isReleaseOrStronger(Ord))
11756 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11757 return nullptr;
11758}
11759
11761 Instruction *Inst,
11762 AtomicOrdering Ord) const {
11763 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11764 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11765 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11766 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11767 if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11768 return Builder.CreateCall(
11770 Builder.GetInsertBlock()->getParent()->getParent(),
11771 Intrinsic::ppc_cfence, {Inst->getType()}),
11772 {Inst});
11773 // FIXME: Can use isync for rmw operation.
11774 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11775 }
11776 return nullptr;
11777}
11778
11781 unsigned AtomicSize,
11782 unsigned BinOpcode,
11783 unsigned CmpOpcode,
11784 unsigned CmpPred) const {
11785 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11786 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11787
11788 auto LoadMnemonic = PPC::LDARX;
11789 auto StoreMnemonic = PPC::STDCX;
11790 switch (AtomicSize) {
11791 default:
11792 llvm_unreachable("Unexpected size of atomic entity");
11793 case 1:
11794 LoadMnemonic = PPC::LBARX;
11795 StoreMnemonic = PPC::STBCX;
11796 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11797 break;
11798 case 2:
11799 LoadMnemonic = PPC::LHARX;
11800 StoreMnemonic = PPC::STHCX;
11801 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11802 break;
11803 case 4:
11804 LoadMnemonic = PPC::LWARX;
11805 StoreMnemonic = PPC::STWCX;
11806 break;
11807 case 8:
11808 LoadMnemonic = PPC::LDARX;
11809 StoreMnemonic = PPC::STDCX;
11810 break;
11811 }
11812
11813 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11814 MachineFunction *F = BB->getParent();
11816
11817 Register dest = MI.getOperand(0).getReg();
11818 Register ptrA = MI.getOperand(1).getReg();
11819 Register ptrB = MI.getOperand(2).getReg();
11820 Register incr = MI.getOperand(3).getReg();
11821 DebugLoc dl = MI.getDebugLoc();
11822
11823 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11825 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11826 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11827 F->insert(It, loopMBB);
11828 if (CmpOpcode)
11829 F->insert(It, loop2MBB);
11830 F->insert(It, exitMBB);
11831 exitMBB->splice(exitMBB->begin(), BB,
11832 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11833 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11834
11835 MachineRegisterInfo &RegInfo = F->getRegInfo();
11836 Register TmpReg = (!BinOpcode) ? incr :
11837 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11838 : &PPC::GPRCRegClass);
11839
11840 // thisMBB:
11841 // ...
11842 // fallthrough --> loopMBB
11843 BB->addSuccessor(loopMBB);
11844
11845 // loopMBB:
11846 // l[wd]arx dest, ptr
11847 // add r0, dest, incr
11848 // st[wd]cx. r0, ptr
11849 // bne- loopMBB
11850 // fallthrough --> exitMBB
11851
11852 // For max/min...
11853 // loopMBB:
11854 // l[wd]arx dest, ptr
11855 // cmpl?[wd] dest, incr
11856 // bgt exitMBB
11857 // loop2MBB:
11858 // st[wd]cx. dest, ptr
11859 // bne- loopMBB
11860 // fallthrough --> exitMBB
11861
11862 BB = loopMBB;
11863 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11865 if (BinOpcode)
11866 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11867 if (CmpOpcode) {
11868 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11869 // Signed comparisons of byte or halfword values must be sign-extended.
11870 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11871 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11872 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11873 ExtReg).addReg(dest);
11874 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
11875 } else
11876 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
11877
11878 BuildMI(BB, dl, TII->get(PPC::BCC))
11879 .addImm(CmpPred)
11880 .addReg(CrReg)
11881 .addMBB(exitMBB);
11883 BB->addSuccessor(exitMBB);
11884 BB = loop2MBB;
11885 }
11886 BuildMI(BB, dl, TII->get(StoreMnemonic))
11887 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11888 BuildMI(BB, dl, TII->get(PPC::BCC))
11890 BB->addSuccessor(loopMBB);
11891 BB->addSuccessor(exitMBB);
11892
11893 // exitMBB:
11894 // ...
11895 BB = exitMBB;
11896 return BB;
11897}
11898
11900 switch(MI.getOpcode()) {
11901 default:
11902 return false;
11903 case PPC::COPY:
11904 return TII->isSignExtended(MI.getOperand(1).getReg(),
11905 &MI.getMF()->getRegInfo());
11906 case PPC::LHA:
11907 case PPC::LHA8:
11908 case PPC::LHAU:
11909 case PPC::LHAU8:
11910 case PPC::LHAUX:
11911 case PPC::LHAUX8:
11912 case PPC::LHAX:
11913 case PPC::LHAX8:
11914 case PPC::LWA:
11915 case PPC::LWAUX:
11916 case PPC::LWAX:
11917 case PPC::LWAX_32:
11918 case PPC::LWA_32:
11919 case PPC::PLHA:
11920 case PPC::PLHA8:
11921 case PPC::PLHA8pc:
11922 case PPC::PLHApc:
11923 case PPC::PLWA:
11924 case PPC::PLWA8:
11925 case PPC::PLWA8pc:
11926 case PPC::PLWApc:
11927 case PPC::EXTSB:
11928 case PPC::EXTSB8:
11929 case PPC::EXTSB8_32_64:
11930 case PPC::EXTSB8_rec:
11931 case PPC::EXTSB_rec:
11932 case PPC::EXTSH:
11933 case PPC::EXTSH8:
11934 case PPC::EXTSH8_32_64:
11935 case PPC::EXTSH8_rec:
11936 case PPC::EXTSH_rec:
11937 case PPC::EXTSW:
11938 case PPC::EXTSWSLI:
11939 case PPC::EXTSWSLI_32_64:
11940 case PPC::EXTSWSLI_32_64_rec:
11941 case PPC::EXTSWSLI_rec:
11942 case PPC::EXTSW_32:
11943 case PPC::EXTSW_32_64:
11944 case PPC::EXTSW_32_64_rec:
11945 case PPC::EXTSW_rec:
11946 case PPC::SRAW:
11947 case PPC::SRAWI:
11948 case PPC::SRAWI_rec:
11949 case PPC::SRAW_rec:
11950 return true;
11951 }
11952 return false;
11953}
11954
11957 bool is8bit, // operation
11958 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11959 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11960 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11961
11962 // If this is a signed comparison and the value being compared is not known
11963 // to be sign extended, sign extend it here.
11964 DebugLoc dl = MI.getDebugLoc();
11965 MachineFunction *F = BB->getParent();
11966 MachineRegisterInfo &RegInfo = F->getRegInfo();
11967 Register incr = MI.getOperand(3).getReg();
11968 bool IsSignExtended =
11969 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
11970
11971 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11972 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11973 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11974 .addReg(MI.getOperand(3).getReg());
11975 MI.getOperand(3).setReg(ValueReg);
11976 incr = ValueReg;
11977 }
11978 // If we support part-word atomic mnemonics, just use them
11979 if (Subtarget.hasPartwordAtomics())
11980 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11981 CmpPred);
11982
11983 // In 64 bit mode we have to use 64 bits for addresses, even though the
11984 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11985 // registers without caring whether they're 32 or 64, but here we're
11986 // doing actual arithmetic on the addresses.
11987 bool is64bit = Subtarget.isPPC64();
11988 bool isLittleEndian = Subtarget.isLittleEndian();
11989 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11990
11991 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11993
11994 Register dest = MI.getOperand(0).getReg();
11995 Register ptrA = MI.getOperand(1).getReg();
11996 Register ptrB = MI.getOperand(2).getReg();
11997
11998 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12000 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12001 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12002 F->insert(It, loopMBB);
12003 if (CmpOpcode)
12004 F->insert(It, loop2MBB);
12005 F->insert(It, exitMBB);
12006 exitMBB->splice(exitMBB->begin(), BB,
12007 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12008 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12009
12010 const TargetRegisterClass *RC =
12011 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12012 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12013
12014 Register PtrReg = RegInfo.createVirtualRegister(RC);
12015 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12016 Register ShiftReg =
12017 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12018 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12019 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12020 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12021 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12022 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12023 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12024 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12025 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12026 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12028 Register TmpReg =
12029 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12030
12031 // thisMBB:
12032 // ...
12033 // fallthrough --> loopMBB
12034 BB->addSuccessor(loopMBB);
12035
12036 // The 4-byte load must be aligned, while a char or short may be
12037 // anywhere in the word. Hence all this nasty bookkeeping code.
12038 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12039 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12040 // xori shift, shift1, 24 [16]
12041 // rlwinm ptr, ptr1, 0, 0, 29
12042 // slw incr2, incr, shift
12043 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12044 // slw mask, mask2, shift
12045 // loopMBB:
12046 // lwarx tmpDest, ptr
12047 // add tmp, tmpDest, incr2
12048 // andc tmp2, tmpDest, mask
12049 // and tmp3, tmp, mask
12050 // or tmp4, tmp3, tmp2
12051 // stwcx. tmp4, ptr
12052 // bne- loopMBB
12053 // fallthrough --> exitMBB
12054 // srw SrwDest, tmpDest, shift
12055 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12056 if (ptrA != ZeroReg) {
12057 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12058 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12059 .addReg(ptrA)
12060 .addReg(ptrB);
12061 } else {
12062 Ptr1Reg = ptrB;
12063 }
12064 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12065 // mode.
12066 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12067 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12068 .addImm(3)
12069 .addImm(27)
12070 .addImm(is8bit ? 28 : 27);
12071 if (!isLittleEndian)
12072 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12074 .addImm(is8bit ? 24 : 16);
12075 if (is64bit)
12076 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12077 .addReg(Ptr1Reg)
12078 .addImm(0)
12079 .addImm(61);
12080 else
12081 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12082 .addReg(Ptr1Reg)
12083 .addImm(0)
12084 .addImm(0)
12085 .addImm(29);
12086 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12087 if (is8bit)
12088 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12089 else {
12090 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12091 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12093 .addImm(65535);
12094 }
12095 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12097 .addReg(ShiftReg);
12098
12099 BB = loopMBB;
12100 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12101 .addReg(ZeroReg)
12102 .addReg(PtrReg);
12103 if (BinOpcode)
12104 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12107 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12109 .addReg(MaskReg);
12110 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12111 if (CmpOpcode) {
12112 // For unsigned comparisons, we can directly compare the shifted values.
12113 // For signed comparisons we shift and sign extend.
12114 Register SReg = RegInfo.createVirtualRegister(GPRC);
12115 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12116 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12118 .addReg(MaskReg);
12119 unsigned ValueReg = SReg;
12120 unsigned CmpReg = Incr2Reg;
12121 if (CmpOpcode == PPC::CMPW) {
12122 ValueReg = RegInfo.createVirtualRegister(GPRC);
12123 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12124 .addReg(SReg)
12125 .addReg(ShiftReg);
12126 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12127 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12128 .addReg(ValueReg);
12130 CmpReg = incr;
12131 }
12133 BuildMI(BB, dl, TII->get(PPC::BCC))
12134 .addImm(CmpPred)
12135 .addReg(CrReg)
12136 .addMBB(exitMBB);
12138 BB->addSuccessor(exitMBB);
12139 BB = loop2MBB;
12140 }
12141 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12142 BuildMI(BB, dl, TII->get(PPC::STWCX))
12143 .addReg(Tmp4Reg)
12144 .addReg(ZeroReg)
12145 .addReg(PtrReg);
12146 BuildMI(BB, dl, TII->get(PPC::BCC))
12148 .addReg(PPC::CR0)
12149 .addMBB(loopMBB);
12150 BB->addSuccessor(loopMBB);
12151 BB->addSuccessor(exitMBB);
12152
12153 // exitMBB:
12154 // ...
12155 BB = exitMBB;
12156 // Since the shift amount is not a constant, we need to clear
12157 // the upper bits with a separate RLWINM.
12158 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12160 .addImm(0)
12161 .addImm(is8bit ? 24 : 16)
12162 .addImm(31);
12163 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12165 .addReg(ShiftReg);
12166 return BB;
12167}
12168
12171 MachineBasicBlock *MBB) const {
12172 DebugLoc DL = MI.getDebugLoc();
12173 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12174 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12175
12176 MachineFunction *MF = MBB->getParent();
12178
12179 const BasicBlock *BB = MBB->getBasicBlock();
12181
12182 Register DstReg = MI.getOperand(0).getReg();
12183 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12184 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12185 Register mainDstReg = MRI.createVirtualRegister(RC);
12186 Register restoreDstReg = MRI.createVirtualRegister(RC);
12187
12189 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12190 "Invalid Pointer Size!");
12191 // For v = setjmp(buf), we generate
12192 //
12193 // thisMBB:
12194 // SjLjSetup mainMBB
12195 // bl mainMBB
12196 // v_restore = 1
12197 // b sinkMBB
12198 //
12199 // mainMBB:
12200 // buf[LabelOffset] = LR
12201 // v_main = 0
12202 //
12203 // sinkMBB:
12204 // v = phi(main, restore)
12205 //
12206
12210 MF->insert(I, mainMBB);
12211 MF->insert(I, sinkMBB);
12212
12214
12215 // Transfer the remainder of BB and its successor edges to sinkMBB.
12216 sinkMBB->splice(sinkMBB->begin(), MBB,
12217 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12218 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
12219
12220 // Note that the structure of the jmp_buf used here is not compatible
12221 // with that used by libc, and is not designed to be. Specifically, it
12222 // stores only those 'reserved' registers that LLVM does not otherwise
12223 // understand how to spill. Also, by convention, by the time this
12224 // intrinsic is called, Clang has already stored the frame address in the
12225 // first slot of the buffer and stack address in the third. Following the
12226 // X86 target code, we'll store the jump address in the second slot. We also
12227 // need to save the TOC pointer (R2) to handle jumps between shared
12228 // libraries, and that will be stored in the fourth slot. The thread
12229 // identifier (R13) is not affected.
12230
12231 // thisMBB:
12232 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12233 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12234 const int64_t BPOffset = 4 * PVT.getStoreSize();
12235
12236 // Prepare IP either in reg.
12238 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12239 Register BufReg = MI.getOperand(1).getReg();
12240
12241 if (Subtarget.is64BitELFABI()) {
12243 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12244 .addReg(PPC::X2)
12246 .addReg(BufReg)
12247 .cloneMemRefs(MI);
12248 }
12249
12250 // Naked functions never have a base pointer, and so we use r1. For all
12251 // other functions, this decision must be delayed until during PEI.
12252 unsigned BaseReg;
12253 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12254 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12255 else
12256 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12257
12258 MIB = BuildMI(*thisMBB, MI, DL,
12259 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12260 .addReg(BaseReg)
12262 .addReg(BufReg)
12263 .cloneMemRefs(MI);
12264
12265 // Setup
12266 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12267 MIB.addRegMask(TRI->getNoPreservedMask());
12268
12269 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12270
12271 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12272 .addMBB(mainMBB);
12273 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12274
12275 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12276 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12277
12278 // mainMBB:
12279 // mainDstReg = 0
12280 MIB =
12282 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12283
12284 // Store IP
12285 if (Subtarget.isPPC64()) {
12286 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12289 .addReg(BufReg);
12290 } else {
12291 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12294 .addReg(BufReg);
12295 }
12296 MIB.cloneMemRefs(MI);
12297
12298 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12299 mainMBB->addSuccessor(sinkMBB);
12300
12301 // sinkMBB:
12302 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12303 TII->get(PPC::PHI), DstReg)
12306
12307 MI.eraseFromParent();
12308 return sinkMBB;
12309}
12310
12313 MachineBasicBlock *MBB) const {
12314 DebugLoc DL = MI.getDebugLoc();
12315 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12316
12317 MachineFunction *MF = MBB->getParent();
12319
12321 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12322 "Invalid Pointer Size!");
12323
12324 const TargetRegisterClass *RC =
12325 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12326 Register Tmp = MRI.createVirtualRegister(RC);
12327 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12328 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12329 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12330 unsigned BP =
12331 (PVT == MVT::i64)
12332 ? PPC::X30
12333 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12334 : PPC::R30);
12335
12337
12338 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12339 const int64_t SPOffset = 2 * PVT.getStoreSize();
12340 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12341 const int64_t BPOffset = 4 * PVT.getStoreSize();
12342
12343 Register BufReg = MI.getOperand(0).getReg();
12344
12345 // Reload FP (the jumped-to function may not have had a
12346 // frame pointer, and if so, then its r31 will be restored
12347 // as necessary).
12348 if (PVT == MVT::i64) {
12349 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12350 .addImm(0)
12351 .addReg(BufReg);
12352 } else {
12353 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12354 .addImm(0)
12355 .addReg(BufReg);
12356 }
12357 MIB.cloneMemRefs(MI);
12358
12359 // Reload IP
12360 if (PVT == MVT::i64) {
12361 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12363 .addReg(BufReg);
12364 } else {
12365 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12367 .addReg(BufReg);
12368 }
12369 MIB.cloneMemRefs(MI);
12370
12371 // Reload SP
12372 if (PVT == MVT::i64) {
12373 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12374 .addImm(SPOffset)
12375 .addReg(BufReg);
12376 } else {
12377 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12378 .addImm(SPOffset)
12379 .addReg(BufReg);
12380 }
12381 MIB.cloneMemRefs(MI);
12382
12383 // Reload BP
12384 if (PVT == MVT::i64) {
12385 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12387 .addReg(BufReg);
12388 } else {
12389 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12391 .addReg(BufReg);
12392 }
12393 MIB.cloneMemRefs(MI);
12394
12395 // Reload TOC
12396 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12398 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12400 .addReg(BufReg)
12401 .cloneMemRefs(MI);
12402 }
12403
12404 // Jump
12405 BuildMI(*MBB, MI, DL,
12406 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12407 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12408
12409 MI.eraseFromParent();
12410 return MBB;
12411}
12412
12414 // If the function specifically requests inline stack probes, emit them.
12415 if (MF.getFunction().hasFnAttribute("probe-stack"))
12416 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12417 "inline-asm";
12418 return false;
12419}
12420
12422 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12423 unsigned StackAlign = TFI->getStackAlignment();
12424 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12425 "Unexpected stack alignment");
12426 // The default stack probe size is 4096 if the function has no
12427 // stack-probe-size attribute.
12428 const Function &Fn = MF.getFunction();
12429 unsigned StackProbeSize =
12430 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12431 // Round down to the stack alignment.
12432 StackProbeSize &= ~(StackAlign - 1);
12433 return StackProbeSize ? StackProbeSize : StackAlign;
12434}
12435
12436// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12437// into three phases. In the first phase, it uses pseudo instruction
12438// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12439// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12440// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12441// MaxCallFrameSize so that it can calculate correct data area pointer.
12444 MachineBasicBlock *MBB) const {
12445 const bool isPPC64 = Subtarget.isPPC64();
12446 MachineFunction *MF = MBB->getParent();
12447 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12448 DebugLoc DL = MI.getDebugLoc();
12449 const unsigned ProbeSize = getStackProbeSize(*MF);
12452 // The CFG of probing stack looks as
12453 // +-----+
12454 // | MBB |
12455 // +--+--+
12456 // |
12457 // +----v----+
12458 // +--->+ TestMBB +---+
12459 // | +----+----+ |
12460 // | | |
12461 // | +-----v----+ |
12462 // +---+ BlockMBB | |
12463 // +----------+ |
12464 // |
12465 // +---------+ |
12466 // | TailMBB +<--+
12467 // +---------+
12468 // In MBB, calculate previous frame pointer and final stack pointer.
12469 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12470 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12471 // TailMBB is spliced via \p MI.
12475
12477 MF->insert(MBBIter, TestMBB);
12478 MF->insert(MBBIter, BlockMBB);
12479 MF->insert(MBBIter, TailMBB);
12480
12481 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12482 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12483
12484 Register DstReg = MI.getOperand(0).getReg();
12485 Register NegSizeReg = MI.getOperand(1).getReg();
12486 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12487 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12488 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12489 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12490
12491 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12492 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12493 // NegSize.
12494 unsigned ProbeOpc;
12495 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12496 ProbeOpc =
12497 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12498 else
12499 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12500 // and NegSizeReg will be allocated in the same phyreg to avoid
12501 // redundant copy when NegSizeReg has only one use which is current MI and
12502 // will be replaced by PREPARE_PROBED_ALLOCA then.
12503 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12504 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12505 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12508 .add(MI.getOperand(2))
12509 .add(MI.getOperand(3));
12510
12511 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12512 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12514 .addReg(SPReg)
12516
12517 // Materialize a scratch register for update.
12518 int64_t NegProbeSize = -(int64_t)ProbeSize;
12519 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12520 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12521 if (!isInt<16>(NegProbeSize)) {
12522 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12523 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12524 .addImm(NegProbeSize >> 16);
12525 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12526 ScratchReg)
12527 .addReg(TempReg)
12528 .addImm(NegProbeSize & 0xFFFF);
12529 } else
12530 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12532
12533 {
12534 // Probing leading residual part.
12535 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12536 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12539 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12540 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12541 .addReg(Div)
12543 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12544 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12545 .addReg(Mul)
12547 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12549 .addReg(SPReg)
12550 .addReg(NegMod);
12551 }
12552
12553 {
12554 // Remaining part should be multiple of ProbeSize.
12555 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12556 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12557 .addReg(SPReg)
12559 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12562 .addMBB(TailMBB);
12563 TestMBB->addSuccessor(BlockMBB);
12564 TestMBB->addSuccessor(TailMBB);
12565 }
12566
12567 {
12568 // Touch the block.
12569 // |P...|P...|P...
12570 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12572 .addReg(SPReg)
12574 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12575 BlockMBB->addSuccessor(TestMBB);
12576 }
12577
12578 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12579 // DYNAREAOFFSET pseudo instruction to get the future result.
12581 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12583 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12585 .add(MI.getOperand(2))
12586 .add(MI.getOperand(3));
12587 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12588 .addReg(SPReg)
12590
12591 // Splice instructions after MI to TailMBB.
12592 TailMBB->splice(TailMBB->end(), MBB,
12593 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12594 TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
12596
12597 // Delete the pseudo instruction.
12598 MI.eraseFromParent();
12599
12601 return TailMBB;
12602}
12603
12606 MachineBasicBlock *BB) const {
12607 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12608 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12609 if (Subtarget.is64BitELFABI() &&
12610 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12611 !Subtarget.isUsingPCRelativeCalls()) {
12612 // Call lowering should have added an r2 operand to indicate a dependence
12613 // on the TOC base pointer value. It can't however, because there is no
12614 // way to mark the dependence as implicit there, and so the stackmap code
12615 // will confuse it with a regular operand. Instead, add the dependence
12616 // here.
12617 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12618 }
12619
12620 return emitPatchPoint(MI, BB);
12621 }
12622
12623 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12624 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12625 return emitEHSjLjSetJmp(MI, BB);
12626 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12627 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12628 return emitEHSjLjLongJmp(MI, BB);
12629 }
12630
12631 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12632
12633 // To "insert" these instructions we actually have to insert their
12634 // control-flow patterns.
12635 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12637
12638 MachineFunction *F = BB->getParent();
12639 MachineRegisterInfo &MRI = F->getRegInfo();
12640
12641 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12642 MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12643 MI.getOpcode() == PPC::SELECT_I8) {
12645 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12646 MI.getOpcode() == PPC::SELECT_CC_I8)
12647 Cond.push_back(MI.getOperand(4));
12648 else
12650 Cond.push_back(MI.getOperand(1));
12651
12652 DebugLoc dl = MI.getDebugLoc();
12653 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12654 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12655 } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12656 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12657 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12658 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12659 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12660 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12661 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12662 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12663 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12664 MI.getOpcode() == PPC::SELECT_F4 ||
12665 MI.getOpcode() == PPC::SELECT_F8 ||
12666 MI.getOpcode() == PPC::SELECT_F16 ||
12667 MI.getOpcode() == PPC::SELECT_SPE ||
12668 MI.getOpcode() == PPC::SELECT_SPE4 ||
12669 MI.getOpcode() == PPC::SELECT_VRRC ||
12670 MI.getOpcode() == PPC::SELECT_VSFRC ||
12671 MI.getOpcode() == PPC::SELECT_VSSRC ||
12672 MI.getOpcode() == PPC::SELECT_VSRC) {
12673 // The incoming instruction knows the destination vreg to set, the
12674 // condition code register to branch on, the true/false values to
12675 // select between, and a branch opcode to use.
12676
12677 // thisMBB:
12678 // ...
12679 // TrueVal = ...
12680 // cmpTY ccX, r1, r2
12681 // bCC copy1MBB
12682 // fallthrough --> copy0MBB
12684 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12685 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12686 DebugLoc dl = MI.getDebugLoc();
12687 F->insert(It, copy0MBB);
12688 F->insert(It, sinkMBB);
12689
12690 // Transfer the remainder of BB and its successor edges to sinkMBB.
12691 sinkMBB->splice(sinkMBB->begin(), BB,
12692 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12693 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12694
12695 // Next, add the true and fallthrough blocks as its successors.
12697 BB->addSuccessor(sinkMBB);
12698
12699 if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12700 MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12701 MI.getOpcode() == PPC::SELECT_F16 ||
12702 MI.getOpcode() == PPC::SELECT_SPE4 ||
12703 MI.getOpcode() == PPC::SELECT_SPE ||
12704 MI.getOpcode() == PPC::SELECT_VRRC ||
12705 MI.getOpcode() == PPC::SELECT_VSFRC ||
12706 MI.getOpcode() == PPC::SELECT_VSSRC ||
12707 MI.getOpcode() == PPC::SELECT_VSRC) {
12708 BuildMI(BB, dl, TII->get(PPC::BC))
12709 .addReg(MI.getOperand(1).getReg())
12710 .addMBB(sinkMBB);
12711 } else {
12712 unsigned SelectPred = MI.getOperand(4).getImm();
12713 BuildMI(BB, dl, TII->get(PPC::BCC))
12715 .addReg(MI.getOperand(1).getReg())
12716 .addMBB(sinkMBB);
12717 }
12718
12719 // copy0MBB:
12720 // %FalseValue = ...
12721 // # fallthrough to sinkMBB
12722 BB = copy0MBB;
12723
12724 // Update machine-CFG edges
12725 BB->addSuccessor(sinkMBB);
12726
12727 // sinkMBB:
12728 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12729 // ...
12730 BB = sinkMBB;
12731 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12732 .addReg(MI.getOperand(3).getReg())
12734 .addReg(MI.getOperand(2).getReg())
12735 .addMBB(thisMBB);
12736 } else if (MI.getOpcode() == PPC::ReadTB) {
12737 // To read the 64-bit time-base register on a 32-bit target, we read the
12738 // two halves. Should the counter have wrapped while it was being read, we
12739 // need to try again.
12740 // ...
12741 // readLoop:
12742 // mfspr Rx,TBU # load from TBU
12743 // mfspr Ry,TB # load from TB
12744 // mfspr Rz,TBU # load from TBU
12745 // cmpw crX,Rx,Rz # check if 'old'='new'
12746 // bne readLoop # branch if they're not equal
12747 // ...
12748
12749 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12750 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12751 DebugLoc dl = MI.getDebugLoc();
12752 F->insert(It, readMBB);
12753 F->insert(It, sinkMBB);
12754
12755 // Transfer the remainder of BB and its successor edges to sinkMBB.
12756 sinkMBB->splice(sinkMBB->begin(), BB,
12757 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12758 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12759
12760 BB->addSuccessor(readMBB);
12761 BB = readMBB;
12762
12763 MachineRegisterInfo &RegInfo = F->getRegInfo();
12764 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12765 Register LoReg = MI.getOperand(0).getReg();
12766 Register HiReg = MI.getOperand(1).getReg();
12767
12768 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12769 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12770 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12771
12772 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12773
12774 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12775 .addReg(HiReg)
12777 BuildMI(BB, dl, TII->get(PPC::BCC))
12779 .addReg(CmpReg)
12780 .addMBB(readMBB);
12781
12782 BB->addSuccessor(readMBB);
12783 BB->addSuccessor(sinkMBB);
12784 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12785 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12786 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12787 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12788 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12789 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12790 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12791 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12792
12793 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12794 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12795 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12796 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12797 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12798 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12799 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12800 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12801
12802 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12803 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12804 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12805 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12806 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12807 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12808 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12809 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12810
12811 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12812 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12813 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12814 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12815 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12816 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12817 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12818 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12819
12820 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12821 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12822 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12823 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12824 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12825 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12826 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12827 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12828
12829 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12830 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12831 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12832 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12833 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12834 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12835 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12836 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12837
12838 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12839 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
12840 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12841 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
12842 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12843 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
12844 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12845 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
12846
12847 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12848 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
12849 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12850 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
12851 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12852 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
12853 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12854 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
12855
12856 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12857 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
12858 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12859 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
12860 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12861 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
12862 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12863 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
12864
12865 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12866 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
12867 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12868 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
12869 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12870 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
12871 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12872 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
12873
12874 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12875 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12876 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12877 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12878 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12879 BB = EmitAtomicBinary(MI, BB, 4, 0);
12880 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12881 BB = EmitAtomicBinary(MI, BB, 8, 0);
12882 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12883 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12884 (Subtarget.hasPartwordAtomics() &&
12885 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12886 (Subtarget.hasPartwordAtomics() &&
12887 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12888 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12889
12890 auto LoadMnemonic = PPC::LDARX;
12891 auto StoreMnemonic = PPC::STDCX;
12892 switch (MI.getOpcode()) {
12893 default:
12894 llvm_unreachable("Compare and swap of unknown size");
12895 case PPC::ATOMIC_CMP_SWAP_I8:
12896 LoadMnemonic = PPC::LBARX;
12897 StoreMnemonic = PPC::STBCX;
12898 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12899 break;
12900 case PPC::ATOMIC_CMP_SWAP_I16:
12901 LoadMnemonic = PPC::LHARX;
12902 StoreMnemonic = PPC::STHCX;
12903 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12904 break;
12905 case PPC::ATOMIC_CMP_SWAP_I32:
12906 LoadMnemonic = PPC::LWARX;
12907 StoreMnemonic = PPC::STWCX;
12908 break;
12909 case PPC::ATOMIC_CMP_SWAP_I64:
12910 LoadMnemonic = PPC::LDARX;
12911 StoreMnemonic = PPC::STDCX;
12912 break;
12913 }
12914 MachineRegisterInfo &RegInfo = F->getRegInfo();
12915 Register dest = MI.getOperand(0).getReg();
12916 Register ptrA = MI.getOperand(1).getReg();
12917 Register ptrB = MI.getOperand(2).getReg();
12918 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12919 Register oldval = MI.getOperand(3).getReg();
12920 Register newval = MI.getOperand(4).getReg();
12921 DebugLoc dl = MI.getDebugLoc();
12922
12923 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12924 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12925 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12926 F->insert(It, loop1MBB);
12927 F->insert(It, loop2MBB);
12928 F->insert(It, exitMBB);
12929 exitMBB->splice(exitMBB->begin(), BB,
12930 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12931 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12932
12933 // thisMBB:
12934 // ...
12935 // fallthrough --> loopMBB
12937
12938 // loop1MBB:
12939 // l[bhwd]arx dest, ptr
12940 // cmp[wd] dest, oldval
12941 // bne- exitBB
12942 // loop2MBB:
12943 // st[bhwd]cx. newval, ptr
12944 // bne- loopMBB
12945 // b exitBB
12946 // exitBB:
12947 BB = loop1MBB;
12948 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12949 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
12950 .addReg(dest)
12951 .addReg(oldval);
12952 BuildMI(BB, dl, TII->get(PPC::BCC))
12954 .addReg(CrReg)
12955 .addMBB(exitMBB);
12957 BB->addSuccessor(exitMBB);
12958
12959 BB = loop2MBB;
12960 BuildMI(BB, dl, TII->get(StoreMnemonic))
12961 .addReg(newval)
12962 .addReg(ptrA)
12963 .addReg(ptrB);
12964 BuildMI(BB, dl, TII->get(PPC::BCC))
12966 .addReg(PPC::CR0)
12967 .addMBB(loop1MBB);
12968 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12970 BB->addSuccessor(exitMBB);
12971
12972 // exitMBB:
12973 // ...
12974 BB = exitMBB;
12975 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12976 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12977 // We must use 64-bit registers for addresses when targeting 64-bit,
12978 // since we're actually doing arithmetic on them. Other registers
12979 // can be 32-bit.
12980 bool is64bit = Subtarget.isPPC64();
12981 bool isLittleEndian = Subtarget.isLittleEndian();
12982 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12983
12984 Register dest = MI.getOperand(0).getReg();
12985 Register ptrA = MI.getOperand(1).getReg();
12986 Register ptrB = MI.getOperand(2).getReg();
12987 Register oldval = MI.getOperand(3).getReg();
12988 Register newval = MI.getOperand(4).getReg();
12989 DebugLoc dl = MI.getDebugLoc();
12990
12991 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12992 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12993 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12994 F->insert(It, loop1MBB);
12995 F->insert(It, loop2MBB);
12996 F->insert(It, exitMBB);
12997 exitMBB->splice(exitMBB->begin(), BB,
12998 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12999 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13000
13001 MachineRegisterInfo &RegInfo = F->getRegInfo();
13002 const TargetRegisterClass *RC =
13003 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13004 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13005
13006 Register PtrReg = RegInfo.createVirtualRegister(RC);
13007 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13008 Register ShiftReg =
13009 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13010 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13011 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13012 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13013 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13014 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13015 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13016 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13017 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13018 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13019 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13021 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13022 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13023 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13024 // thisMBB:
13025 // ...
13026 // fallthrough --> loopMBB
13028
13029 // The 4-byte load must be aligned, while a char or short may be
13030 // anywhere in the word. Hence all this nasty bookkeeping code.
13031 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13032 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13033 // xori shift, shift1, 24 [16]
13034 // rlwinm ptr, ptr1, 0, 0, 29
13035 // slw newval2, newval, shift
13036 // slw oldval2, oldval,shift
13037 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13038 // slw mask, mask2, shift
13039 // and newval3, newval2, mask
13040 // and oldval3, oldval2, mask
13041 // loop1MBB:
13042 // lwarx tmpDest, ptr
13043 // and tmp, tmpDest, mask
13044 // cmpw tmp, oldval3
13045 // bne- exitBB
13046 // loop2MBB:
13047 // andc tmp2, tmpDest, mask
13048 // or tmp4, tmp2, newval3
13049 // stwcx. tmp4, ptr
13050 // bne- loop1MBB
13051 // b exitBB
13052 // exitBB:
13053 // srw dest, tmpDest, shift
13054 if (ptrA != ZeroReg) {
13055 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13056 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13057 .addReg(ptrA)
13058 .addReg(ptrB);
13059 } else {
13060 Ptr1Reg = ptrB;
13061 }
13062
13063 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13064 // mode.
13065 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13066 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13067 .addImm(3)
13068 .addImm(27)
13069 .addImm(is8bit ? 28 : 27);
13070 if (!isLittleEndian)
13071 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13073 .addImm(is8bit ? 24 : 16);
13074 if (is64bit)
13075 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13076 .addReg(Ptr1Reg)
13077 .addImm(0)
13078 .addImm(61);
13079 else
13080 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13081 .addReg(Ptr1Reg)
13082 .addImm(0)
13083 .addImm(0)
13084 .addImm(29);
13085 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13086 .addReg(newval)
13087 .addReg(ShiftReg);
13088 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13089 .addReg(oldval)
13090 .addReg(ShiftReg);
13091 if (is8bit)
13092 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13093 else {
13094 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13095 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13097 .addImm(65535);
13098 }
13099 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13101 .addReg(ShiftReg);
13102 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13104 .addReg(MaskReg);
13105 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13107 .addReg(MaskReg);
13108
13109 BB = loop1MBB;
13110 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13111 .addReg(ZeroReg)
13112 .addReg(PtrReg);
13113 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13115 .addReg(MaskReg);
13116 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13117 .addReg(TmpReg)
13119 BuildMI(BB, dl, TII->get(PPC::BCC))
13121 .addReg(CrReg)
13122 .addMBB(exitMBB);
13124 BB->addSuccessor(exitMBB);
13125
13126 BB = loop2MBB;
13127 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13129 .addReg(MaskReg);
13130 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13131 .addReg(Tmp2Reg)
13133 BuildMI(BB, dl, TII->get(PPC::STWCX))
13134 .addReg(Tmp4Reg)
13135 .addReg(ZeroReg)
13136 .addReg(PtrReg);
13137 BuildMI(BB, dl, TII->get(PPC::BCC))
13139 .addReg(PPC::CR0)
13140 .addMBB(loop1MBB);
13141 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13143 BB->addSuccessor(exitMBB);
13144
13145 // exitMBB:
13146 // ...
13147 BB = exitMBB;
13148 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13149 .addReg(TmpReg)
13150 .addReg(ShiftReg);
13151 } else if (MI.getOpcode() == PPC::FADDrtz) {
13152 // This pseudo performs an FADD with rounding mode temporarily forced
13153 // to round-to-zero. We emit this via custom inserter since the FPSCR
13154 // is not modeled at the SelectionDAG level.
13155 Register Dest = MI.getOperand(0).getReg();
13156 Register Src1 = MI.getOperand(1).getReg();
13157 Register Src2 = MI.getOperand(2).getReg();
13158 DebugLoc dl = MI.getDebugLoc();
13159
13160 MachineRegisterInfo &RegInfo = F->getRegInfo();
13161 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13162
13163 // Save FPSCR value.
13164 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13165
13166 // Set rounding mode to round-to-zero.
13167 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13168 .addImm(31)
13170
13171 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13172 .addImm(30)
13174
13175 // Perform addition.
13176 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13177 .addReg(Src1)
13178 .addReg(Src2);
13179 if (MI.getFlag(MachineInstr::NoFPExcept))
13181
13182 // Restore FPSCR value.
13183 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13184 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13185 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13186 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13187 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13188 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13189 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13190 ? PPC::ANDI8_rec
13191 : PPC::ANDI_rec;
13192 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13193 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13194
13195 MachineRegisterInfo &RegInfo = F->getRegInfo();
13196 Register Dest = RegInfo.createVirtualRegister(
13197 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13198
13199 DebugLoc Dl = MI.getDebugLoc();
13200 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13201 .addReg(MI.getOperand(1).getReg())
13202 .addImm(1);
13203 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13204 MI.getOperand(0).getReg())
13205 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13206 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13207 DebugLoc Dl = MI.getDebugLoc();
13208 MachineRegisterInfo &RegInfo = F->getRegInfo();
13209 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13210 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13211 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13212 MI.getOperand(0).getReg())
13213 .addReg(CRReg);
13214 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13215 DebugLoc Dl = MI.getDebugLoc();
13216 unsigned Imm = MI.getOperand(1).getImm();
13217 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13218 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13219 MI.getOperand(0).getReg())
13220 .addReg(PPC::CR0EQ);
13221 } else if (MI.getOpcode() == PPC::SETRNDi) {
13222 DebugLoc dl = MI.getDebugLoc();
13223 Register OldFPSCRReg = MI.getOperand(0).getReg();
13224
13225 // Save FPSCR value.
13226 if (MRI.use_empty(OldFPSCRReg))
13227 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13228 else
13229 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13230
13231 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13232 // the following settings:
13233 // 00 Round to nearest
13234 // 01 Round to 0
13235 // 10 Round to +inf
13236 // 11 Round to -inf
13237
13238 // When the operand is immediate, using the two least significant bits of
13239 // the immediate to set the bits 62:63 of FPSCR.
13240 unsigned Mode = MI.getOperand(1).getImm();
13241 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13242 .addImm(31)
13244
13245 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13246 .addImm(30)
13248 } else if (MI.getOpcode() == PPC::SETRND) {
13249 DebugLoc dl = MI.getDebugLoc();
13250
13251 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13252 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13253 // If the target doesn't have DirectMove, we should use stack to do the
13254 // conversion, because the target doesn't have the instructions like mtvsrd
13255 // or mfvsrd to do this conversion directly.
13256 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13257 if (Subtarget.hasDirectMove()) {
13258 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13259 .addReg(SrcReg);
13260 } else {
13261 // Use stack to do the register copy.
13262 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13263 MachineRegisterInfo &RegInfo = F->getRegInfo();
13264 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13265 if (RC == &PPC::F8RCRegClass) {
13266 // Copy register from F8RCRegClass to G8RCRegclass.
13267 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13268 "Unsupported RegClass.");
13269
13270 StoreOp = PPC::STFD;
13271 LoadOp = PPC::LD;
13272 } else {
13273 // Copy register from G8RCRegClass to F8RCRegclass.
13274 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13275 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13276 "Unsupported RegClass.");
13277 }
13278
13279 MachineFrameInfo &MFI = F->getFrameInfo();
13280 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13281
13282 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13283 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13285 MFI.getObjectAlign(FrameIdx));
13286
13287 // Store the SrcReg into the stack.
13288 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13289 .addReg(SrcReg)
13290 .addImm(0)
13291 .addFrameIndex(FrameIdx)
13293
13294 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13295 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13297 MFI.getObjectAlign(FrameIdx));
13298
13299 // Load from the stack where SrcReg is stored, and save to DestReg,
13300 // so we have done the RegClass conversion from RegClass::SrcReg to
13301 // RegClass::DestReg.
13302 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13303 .addImm(0)
13304 .addFrameIndex(FrameIdx)
13306 }
13307 };
13308
13309 Register OldFPSCRReg = MI.getOperand(0).getReg();
13310
13311 // Save FPSCR value.
13312 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13313
13314 // When the operand is gprc register, use two least significant bits of the
13315 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13316 //
13317 // copy OldFPSCRTmpReg, OldFPSCRReg
13318 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13319 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13320 // copy NewFPSCRReg, NewFPSCRTmpReg
13321 // mtfsf 255, NewFPSCRReg
13322 MachineOperand SrcOp = MI.getOperand(1);
13323 MachineRegisterInfo &RegInfo = F->getRegInfo();
13324 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13325
13327
13328 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13329 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13330
13331 // The first operand of INSERT_SUBREG should be a register which has
13332 // subregisters, we only care about its RegClass, so we should use an
13333 // IMPLICIT_DEF register.
13334 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13335 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13337 .add(SrcOp)
13338 .addImm(1);
13339
13340 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13341 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13344 .addImm(0)
13345 .addImm(62);
13346
13347 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13349
13350 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13351 // bits of FPSCR.
13352 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13353 .addImm(255)
13355 .addImm(0)
13356 .addImm(0);
13357 } else if (MI.getOpcode() == PPC::SETFLM) {
13358 DebugLoc Dl = MI.getDebugLoc();
13359
13360 // Result of setflm is previous FPSCR content, so we need to save it first.
13361 Register OldFPSCRReg = MI.getOperand(0).getReg();
13362 if (MRI.use_empty(OldFPSCRReg))
13363 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13364 else
13365 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13366
13367 // Put bits in 32:63 to FPSCR.
13368 Register NewFPSCRReg = MI.getOperand(1).getReg();
13369 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13370 .addImm(255)
13372 .addImm(0)
13373 .addImm(0);
13374 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13375 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13376 return emitProbedAlloca(MI, BB);
13377 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13378 DebugLoc DL = MI.getDebugLoc();
13379 Register Src = MI.getOperand(2).getReg();
13380 Register Lo = MI.getOperand(0).getReg();
13381 Register Hi = MI.getOperand(1).getReg();
13382 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13383 .addDef(Lo)
13384 .addUse(Src, 0, PPC::sub_gp8_x1);
13385 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13386 .addDef(Hi)
13387 .addUse(Src, 0, PPC::sub_gp8_x0);
13388 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13389 MI.getOpcode() == PPC::STQX_PSEUDO) {
13390 DebugLoc DL = MI.getDebugLoc();
13391 // Ptr is used as the ptr_rc_no_r0 part
13392 // of LQ/STQ's memory operand and adding result of RA and RB,
13393 // so it has to be g8rc_and_g8rc_nox0.
13394 Register Ptr =
13395 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13396 Register Val = MI.getOperand(0).getReg();
13397 Register RA = MI.getOperand(1).getReg();
13398 Register RB = MI.getOperand(2).getReg();
13399 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13400 BuildMI(*BB, MI, DL,
13401 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13402 : TII->get(PPC::STQ))
13403 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13404 .addImm(0)
13405 .addReg(Ptr);
13406 } else {
13407 llvm_unreachable("Unexpected instr type to insert");
13408 }
13409
13410 MI.eraseFromParent(); // The pseudo instruction is gone now.
13411 return BB;
13412}
13413
13414//===----------------------------------------------------------------------===//
13415// Target Optimization Hooks
13416//===----------------------------------------------------------------------===//
13417
13418static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13419 // For the estimates, convergence is quadratic, so we essentially double the
13420 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13421 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13422 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13423 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13424 if (VT.getScalarType() == MVT::f64)
13426 return RefinementSteps;
13427}
13428
13429SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13430 const DenormalMode &Mode) const {
13431 // We only have VSX Vector Test for software Square Root.
13432 EVT VT = Op.getValueType();
13433 if (!isTypeLegal(MVT::i1) ||
13434 (VT != MVT::f64 &&
13435 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13436 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13437
13438 SDLoc DL(Op);
13439 // The output register of FTSQRT is CR field.
13440 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13441 // ftsqrt BF,FRB
13442 // Let e_b be the unbiased exponent of the double-precision
13443 // floating-point operand in register FRB.
13444 // fe_flag is set to 1 if either of the following conditions occurs.
13445 // - The double-precision floating-point operand in register FRB is a zero,
13446 // a NaN, or an infinity, or a negative value.
13447 // - e_b is less than or equal to -970.
13448 // Otherwise fe_flag is set to 0.
13449 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13450 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13451 // exponent is less than -970)
13452 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13453 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13454 FTSQRT, SRIdxVal),
13455 0);
13456}
13457
13458SDValue
13459PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13460 SelectionDAG &DAG) const {
13461 // We only have VSX Vector Square Root.
13462 EVT VT = Op.getValueType();
13463 if (VT != MVT::f64 &&
13464 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13466
13467 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13468}
13469
13470SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13471 int Enabled, int &RefinementSteps,
13472 bool &UseOneConstNR,
13473 bool Reciprocal) const {
13474 EVT VT = Operand.getValueType();
13475 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13476 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13477 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13478 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13481
13482 // The Newton-Raphson computation with a single constant does not provide
13483 // enough accuracy on some CPUs.
13484 UseOneConstNR = !Subtarget.needsTwoConstNR();
13485 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13486 }
13487 return SDValue();
13488}
13489
13490SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13491 int Enabled,
13492 int &RefinementSteps) const {
13493 EVT VT = Operand.getValueType();
13494 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13495 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13496 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13497 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13500 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13501 }
13502 return SDValue();
13503}
13504
13505unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13506 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13507 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13508 // enabled for division), this functionality is redundant with the default
13509 // combiner logic (once the division -> reciprocal/multiply transformation
13510 // has taken place). As a result, this matters more for older cores than for
13511 // newer ones.
13512
13513 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13514 // reciprocal if there are two or more FDIVs (for embedded cores with only
13515 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13516 switch (Subtarget.getCPUDirective()) {
13517 default:
13518 return 3;
13519 case PPC::DIR_440:
13520 case PPC::DIR_A2:
13521 case PPC::DIR_E500:
13522 case PPC::DIR_E500mc:
13523 case PPC::DIR_E5500:
13524 return 2;
13525 }
13526}
13527
13528// isConsecutiveLSLoc needs to work even if all adds have not yet been
13529// collapsed, and so we need to look through chains of them.
13531 int64_t& Offset, SelectionDAG &DAG) {
13532 if (DAG.isBaseWithConstantOffset(Loc)) {
13533 Base = Loc.getOperand(0);
13534 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13535
13536 // The base might itself be a base plus an offset, and if so, accumulate
13537 // that as well.
13538 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13539 }
13540}
13541
13543 unsigned Bytes, int Dist,
13544 SelectionDAG &DAG) {
13545 if (VT.getSizeInBits() / 8 != Bytes)
13546 return false;
13547
13548 SDValue BaseLoc = Base->getBasePtr();
13549 if (Loc.getOpcode() == ISD::FrameIndex) {
13550 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13551 return false;
13553 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13554 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13555 int FS = MFI.getObjectSize(FI);
13556 int BFS = MFI.getObjectSize(BFI);
13557 if (FS != BFS || FS != (int)Bytes) return false;
13558 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13559 }
13560
13561 SDValue Base1 = Loc, Base2 = BaseLoc;
13562 int64_t Offset1 = 0, Offset2 = 0;
13565 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13566 return true;
13567
13568 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13569 const GlobalValue *GV1 = nullptr;
13570 const GlobalValue *GV2 = nullptr;
13571 Offset1 = 0;
13572 Offset2 = 0;
13573 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13574 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13575 if (isGA1 && isGA2 && GV1 == GV2)
13576 return Offset1 == (Offset2 + Dist*Bytes);
13577 return false;
13578}
13579
13580// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13581// not enforce equality of the chain operands.
13583 unsigned Bytes, int Dist,
13584 SelectionDAG &DAG) {
13586 EVT VT = LS->getMemoryVT();
13587 SDValue Loc = LS->getBasePtr();
13588 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13589 }
13590
13591 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13592 EVT VT;
13593 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13594 default: return false;
13595 case Intrinsic::ppc_altivec_lvx:
13596 case Intrinsic::ppc_altivec_lvxl:
13597 case Intrinsic::ppc_vsx_lxvw4x:
13598 case Intrinsic::ppc_vsx_lxvw4x_be:
13599 VT = MVT::v4i32;
13600 break;
13601 case Intrinsic::ppc_vsx_lxvd2x:
13602 case Intrinsic::ppc_vsx_lxvd2x_be:
13603 VT = MVT::v2f64;
13604 break;
13605 case Intrinsic::ppc_altivec_lvebx:
13606 VT = MVT::i8;
13607 break;
13608 case Intrinsic::ppc_altivec_lvehx:
13609 VT = MVT::i16;
13610 break;
13611 case Intrinsic::ppc_altivec_lvewx:
13612 VT = MVT::i32;
13613 break;
13614 }
13615
13616 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13617 }
13618
13619 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13620 EVT VT;
13621 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13622 default: return false;
13623 case Intrinsic::ppc_altivec_stvx:
13624 case Intrinsic::ppc_altivec_stvxl:
13625 case Intrinsic::ppc_vsx_stxvw4x:
13626 VT = MVT::v4i32;
13627 break;
13628 case Intrinsic::ppc_vsx_stxvd2x:
13629 VT = MVT::v2f64;
13630 break;
13631 case Intrinsic::ppc_vsx_stxvw4x_be:
13632 VT = MVT::v4i32;
13633 break;
13634 case Intrinsic::ppc_vsx_stxvd2x_be:
13635 VT = MVT::v2f64;
13636 break;
13637 case Intrinsic::ppc_altivec_stvebx:
13638 VT = MVT::i8;
13639 break;
13640 case Intrinsic::ppc_altivec_stvehx:
13641 VT = MVT::i16;
13642 break;
13643 case Intrinsic::ppc_altivec_stvewx:
13644 VT = MVT::i32;
13645 break;
13646 }
13647
13648 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13649 }
13650
13651 return false;
13652}
13653
13654// Return true is there is a nearyby consecutive load to the one provided
13655// (regardless of alignment). We search up and down the chain, looking though
13656// token factors and other loads (but nothing else). As a result, a true result
13657// indicates that it is safe to create a new consecutive load adjacent to the
13658// load provided.
13660 SDValue Chain = LD->getChain();
13661 EVT VT = LD->getMemoryVT();
13662
13664 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13665 SmallSet<SDNode *, 16> Visited;
13666
13667 // First, search up the chain, branching to follow all token-factor operands.
13668 // If we find a consecutive load, then we're done, otherwise, record all
13669 // nodes just above the top-level loads and token factors.
13670 while (!Queue.empty()) {
13671 SDNode *ChainNext = Queue.pop_back_val();
13672 if (!Visited.insert(ChainNext).second)
13673 continue;
13674
13676 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13677 return true;
13678
13679 if (!Visited.count(ChainLD->getChain().getNode()))
13680 Queue.push_back(ChainLD->getChain().getNode());
13681 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13682 for (const SDUse &O : ChainNext->ops())
13683 if (!Visited.count(O.getNode()))
13684 Queue.push_back(O.getNode());
13685 } else
13686 LoadRoots.insert(ChainNext);
13687 }
13688
13689 // Second, search down the chain, starting from the top-level nodes recorded
13690 // in the first phase. These top-level nodes are the nodes just above all
13691 // loads and token factors. Starting with their uses, recursively look though
13692 // all loads (just the chain uses) and token factors to find a consecutive
13693 // load.
13694 Visited.clear();
13695 Queue.clear();
13696
13697 for (SDNode *I : LoadRoots) {
13698 Queue.push_back(I);
13699
13700 while (!Queue.empty()) {
13701 SDNode *LoadRoot = Queue.pop_back_val();
13702 if (!Visited.insert(LoadRoot).second)
13703 continue;
13704
13706 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13707 return true;
13708
13709 for (SDNode *U : LoadRoot->uses())
13710 if (((isa<MemSDNode>(U) &&
13711 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13712 U->getOpcode() == ISD::TokenFactor) &&
13713 !Visited.count(U))
13714 Queue.push_back(U);
13715 }
13716 }
13717
13718 return false;
13719}
13720
13721/// This function is called when we have proved that a SETCC node can be replaced
13722/// by subtraction (and other supporting instructions) so that the result of
13723/// comparison is kept in a GPR instead of CR. This function is purely for
13724/// codegen purposes and has some flags to guide the codegen process.
13726 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13727 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13728
13729 // Zero extend the operands to the largest legal integer. Originally, they
13730 // must be of a strictly smaller size.
13731 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13732 DAG.getConstant(Size, DL, MVT::i32));
13733 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13734 DAG.getConstant(Size, DL, MVT::i32));
13735
13736 // Swap if needed. Depends on the condition code.
13737 if (Swap)
13738 std::swap(Op0, Op1);
13739
13740 // Subtract extended integers.
13741 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13742
13743 // Move the sign bit to the least significant position and zero out the rest.
13744 // Now the least significant bit carries the result of original comparison.
13745 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13746 DAG.getConstant(Size - 1, DL, MVT::i32));
13747 auto Final = Shifted;
13748
13749 // Complement the result if needed. Based on the condition code.
13750 if (Complement)
13751 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13752 DAG.getConstant(1, DL, MVT::i64));
13753
13754 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13755}
13756
13757SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13758 DAGCombinerInfo &DCI) const {
13759 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13760
13761 SelectionDAG &DAG = DCI.DAG;
13762 SDLoc DL(N);
13763
13764 // Size of integers being compared has a critical role in the following
13765 // analysis, so we prefer to do this when all types are legal.
13766 if (!DCI.isAfterLegalizeDAG())
13767 return SDValue();
13768
13769 // If all users of SETCC extend its value to a legal integer type
13770 // then we replace SETCC with a subtraction
13771 for (const SDNode *U : N->uses())
13772 if (U->getOpcode() != ISD::ZERO_EXTEND)
13773 return SDValue();
13774
13775 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13776 auto OpSize = N->getOperand(0).getValueSizeInBits();
13777
13779
13780 if (OpSize < Size) {
13781 switch (CC) {
13782 default: break;
13783 case ISD::SETULT:
13784 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13785 case ISD::SETULE:
13786 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13787 case ISD::SETUGT:
13788 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13789 case ISD::SETUGE:
13790 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13791 }
13792 }
13793
13794 return SDValue();
13795}
13796
13797SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13798 DAGCombinerInfo &DCI) const {
13799 SelectionDAG &DAG = DCI.DAG;
13800 SDLoc dl(N);
13801
13802 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13803 // If we're tracking CR bits, we need to be careful that we don't have:
13804 // trunc(binary-ops(zext(x), zext(y)))
13805 // or
13806 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13807 // such that we're unnecessarily moving things into GPRs when it would be
13808 // better to keep them in CR bits.
13809
13810 // Note that trunc here can be an actual i1 trunc, or can be the effective
13811 // truncation that comes from a setcc or select_cc.
13812 if (N->getOpcode() == ISD::TRUNCATE &&
13813 N->getValueType(0) != MVT::i1)
13814 return SDValue();
13815
13816 if (N->getOperand(0).getValueType() != MVT::i32 &&
13817 N->getOperand(0).getValueType() != MVT::i64)
13818 return SDValue();
13819
13820 if (N->getOpcode() == ISD::SETCC ||
13821 N->getOpcode() == ISD::SELECT_CC) {
13822 // If we're looking at a comparison, then we need to make sure that the
13823 // high bits (all except for the first) don't matter the result.
13825 cast<CondCodeSDNode>(N->getOperand(
13826 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13827 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13828
13830 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13831 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13832 return SDValue();
13833 } else if (ISD::isUnsignedIntSetCC(CC)) {
13834 if (!DAG.MaskedValueIsZero(N->getOperand(0),
13836 !DAG.MaskedValueIsZero(N->getOperand(1),
13838 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13839 : SDValue());
13840 } else {
13841 // This is neither a signed nor an unsigned comparison, just make sure
13842 // that the high bits are equal.
13843 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13844 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13845
13846 // We don't really care about what is known about the first bit (if
13847 // anything), so pretend that it is known zero for both to ensure they can
13848 // be compared as constants.
13849 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13850 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13851
13852 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13853 Op1Known.getConstant() != Op2Known.getConstant())
13854 return SDValue();
13855 }
13856 }
13857
13858 // We now know that the higher-order bits are irrelevant, we just need to
13859 // make sure that all of the intermediate operations are bit operations, and
13860 // all inputs are extensions.
13861 if (N->getOperand(0).getOpcode() != ISD::AND &&
13862 N->getOperand(0).getOpcode() != ISD::OR &&
13863 N->getOperand(0).getOpcode() != ISD::XOR &&
13864 N->getOperand(0).getOpcode() != ISD::SELECT &&
13865 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13866 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13867 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13868 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13869 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13870 return SDValue();
13871
13872 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13873 N->getOperand(1).getOpcode() != ISD::AND &&
13874 N->getOperand(1).getOpcode() != ISD::OR &&
13875 N->getOperand(1).getOpcode() != ISD::XOR &&
13876 N->getOperand(1).getOpcode() != ISD::SELECT &&
13877 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13878 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13879 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13880 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13881 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13882 return SDValue();
13883
13887
13888 for (unsigned i = 0; i < 2; ++i) {
13889 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13890 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13891 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13892 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13893 isa<ConstantSDNode>(N->getOperand(i)))
13894 Inputs.push_back(N->getOperand(i));
13895 else
13896 BinOps.push_back(N->getOperand(i));
13897
13898 if (N->getOpcode() == ISD::TRUNCATE)
13899 break;
13900 }
13901
13902 // Visit all inputs, collect all binary operations (and, or, xor and
13903 // select) that are all fed by extensions.
13904 while (!BinOps.empty()) {
13905 SDValue BinOp = BinOps.pop_back_val();
13906
13907 if (!Visited.insert(BinOp.getNode()).second)
13908 continue;
13909
13910 PromOps.push_back(BinOp);
13911
13912 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13913 // The condition of the select is not promoted.
13914 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13915 continue;
13916 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13917 continue;
13918
13919 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13920 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13921 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13922 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13923 isa<ConstantSDNode>(BinOp.getOperand(i))) {
13924 Inputs.push_back(BinOp.getOperand(i));
13925 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13926 BinOp.getOperand(i).getOpcode() == ISD::OR ||
13927 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13928 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13929 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13930 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13931 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13932 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13933 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13934 BinOps.push_back(BinOp.getOperand(i));
13935 } else {
13936 // We have an input that is not an extension or another binary
13937 // operation; we'll abort this transformation.
13938 return SDValue();
13939 }
13940 }
13941 }
13942
13943 // Make sure that this is a self-contained cluster of operations (which
13944 // is not quite the same thing as saying that everything has only one
13945 // use).
13946 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13947 if (isa<ConstantSDNode>(Inputs[i]))
13948 continue;
13949
13950 for (const SDNode *User : Inputs[i].getNode()->uses()) {
13951 if (User != N && !Visited.count(User))
13952 return SDValue();
13953
13954 // Make sure that we're not going to promote the non-output-value
13955 // operand(s) or SELECT or SELECT_CC.
13956 // FIXME: Although we could sometimes handle this, and it does occur in
13957 // practice that one of the condition inputs to the select is also one of
13958 // the outputs, we currently can't deal with this.
13959 if (User->getOpcode() == ISD::SELECT) {
13960 if (User->getOperand(0) == Inputs[i])
13961 return SDValue();
13962 } else if (User->getOpcode() == ISD::SELECT_CC) {
13963 if (User->getOperand(0) == Inputs[i] ||
13964 User->getOperand(1) == Inputs[i])
13965 return SDValue();
13966 }
13967 }
13968 }
13969
13970 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13971 for (const SDNode *User : PromOps[i].getNode()->uses()) {
13972 if (User != N && !Visited.count(User))
13973 return SDValue();
13974
13975 // Make sure that we're not going to promote the non-output-value
13976 // operand(s) or SELECT or SELECT_CC.
13977 // FIXME: Although we could sometimes handle this, and it does occur in
13978 // practice that one of the condition inputs to the select is also one of
13979 // the outputs, we currently can't deal with this.
13980 if (User->getOpcode() == ISD::SELECT) {
13981 if (User->getOperand(0) == PromOps[i])
13982 return SDValue();
13983 } else if (User->getOpcode() == ISD::SELECT_CC) {
13984 if (User->getOperand(0) == PromOps[i] ||
13985 User->getOperand(1) == PromOps[i])
13986 return SDValue();
13987 }
13988 }
13989 }
13990
13991 // Replace all inputs with the extension operand.
13992 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13993 // Constants may have users outside the cluster of to-be-promoted nodes,
13994 // and so we need to replace those as we do the promotions.
13995 if (isa<ConstantSDNode>(Inputs[i]))
13996 continue;
13997 else
13998 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13999 }
14000
14001 std::list<HandleSDNode> PromOpHandles;
14002 for (auto &PromOp : PromOps)
14003 PromOpHandles.emplace_back(PromOp);
14004
14005 // Replace all operations (these are all the same, but have a different
14006 // (i1) return type). DAG.getNode will validate that the types of
14007 // a binary operator match, so go through the list in reverse so that
14008 // we've likely promoted both operands first. Any intermediate truncations or
14009 // extensions disappear.
14010 while (!PromOpHandles.empty()) {
14011 SDValue PromOp = PromOpHandles.back().getValue();
14012 PromOpHandles.pop_back();
14013
14014 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14015 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14016 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14017 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14018 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14019 PromOp.getOperand(0).getValueType() != MVT::i1) {
14020 // The operand is not yet ready (see comment below).
14021 PromOpHandles.emplace_front(PromOp);
14022 continue;
14023 }
14024
14025 SDValue RepValue = PromOp.getOperand(0);
14027 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14028
14030 continue;
14031 }
14032
14033 unsigned C;
14034 switch (PromOp.getOpcode()) {
14035 default: C = 0; break;
14036 case ISD::SELECT: C = 1; break;
14037 case ISD::SELECT_CC: C = 2; break;
14038 }
14039
14040 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14041 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14042 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14043 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14044 // The to-be-promoted operands of this node have not yet been
14045 // promoted (this should be rare because we're going through the
14046 // list backward, but if one of the operands has several users in
14047 // this cluster of to-be-promoted nodes, it is possible).
14048 PromOpHandles.emplace_front(PromOp);
14049 continue;
14050 }
14051
14052 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14053 PromOp.getNode()->op_end());
14054
14055 // If there are any constant inputs, make sure they're replaced now.
14056 for (unsigned i = 0; i < 2; ++i)
14057 if (isa<ConstantSDNode>(Ops[C+i]))
14058 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14059
14061 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14062 }
14063
14064 // Now we're left with the initial truncation itself.
14065 if (N->getOpcode() == ISD::TRUNCATE)
14066 return N->getOperand(0);
14067
14068 // Otherwise, this is a comparison. The operands to be compared have just
14069 // changed type (to i1), but everything else is the same.
14070 return SDValue(N, 0);
14071}
14072
14073SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14074 DAGCombinerInfo &DCI) const {
14075 SelectionDAG &DAG = DCI.DAG;
14076 SDLoc dl(N);
14077
14078 // If we're tracking CR bits, we need to be careful that we don't have:
14079 // zext(binary-ops(trunc(x), trunc(y)))
14080 // or
14081 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14082 // such that we're unnecessarily moving things into CR bits that can more
14083 // efficiently stay in GPRs. Note that if we're not certain that the high
14084 // bits are set as required by the final extension, we still may need to do
14085 // some masking to get the proper behavior.
14086
14087 // This same functionality is important on PPC64 when dealing with
14088 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14089 // the return values of functions. Because it is so similar, it is handled
14090 // here as well.
14091
14092 if (N->getValueType(0) != MVT::i32 &&
14093 N->getValueType(0) != MVT::i64)
14094 return SDValue();
14095
14096 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14097 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14098 return SDValue();
14099
14100 if (N->getOperand(0).getOpcode() != ISD::AND &&
14101 N->getOperand(0).getOpcode() != ISD::OR &&
14102 N->getOperand(0).getOpcode() != ISD::XOR &&
14103 N->getOperand(0).getOpcode() != ISD::SELECT &&
14104 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14105 return SDValue();
14106
14108 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14110
14111 // Visit all inputs, collect all binary operations (and, or, xor and
14112 // select) that are all fed by truncations.
14113 while (!BinOps.empty()) {
14114 SDValue BinOp = BinOps.pop_back_val();
14115
14116 if (!Visited.insert(BinOp.getNode()).second)
14117 continue;
14118
14119 PromOps.push_back(BinOp);
14120
14121 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14122 // The condition of the select is not promoted.
14123 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14124 continue;
14125 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14126 continue;
14127
14128 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14129 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14130 Inputs.push_back(BinOp.getOperand(i));
14131 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14132 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14133 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14134 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14135 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14136 BinOps.push_back(BinOp.getOperand(i));
14137 } else {
14138 // We have an input that is not a truncation or another binary
14139 // operation; we'll abort this transformation.
14140 return SDValue();
14141 }
14142 }
14143 }
14144
14145 // The operands of a select that must be truncated when the select is
14146 // promoted because the operand is actually part of the to-be-promoted set.
14148
14149 // Make sure that this is a self-contained cluster of operations (which
14150 // is not quite the same thing as saying that everything has only one
14151 // use).
14152 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14153 if (isa<ConstantSDNode>(Inputs[i]))
14154 continue;
14155
14156 for (SDNode *User : Inputs[i].getNode()->uses()) {
14157 if (User != N && !Visited.count(User))
14158 return SDValue();
14159
14160 // If we're going to promote the non-output-value operand(s) or SELECT or
14161 // SELECT_CC, record them for truncation.
14162 if (User->getOpcode() == ISD::SELECT) {
14163 if (User->getOperand(0) == Inputs[i])
14164 SelectTruncOp[0].insert(std::make_pair(User,
14165 User->getOperand(0).getValueType()));
14166 } else if (User->getOpcode() == ISD::SELECT_CC) {
14167 if (User->getOperand(0) == Inputs[i])
14168 SelectTruncOp[0].insert(std::make_pair(User,
14169 User->getOperand(0).getValueType()));
14170 if (User->getOperand(1) == Inputs[i])
14171 SelectTruncOp[1].insert(std::make_pair(User,
14172 User->getOperand(1).getValueType()));
14173 }
14174 }
14175 }
14176
14177 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14178 for (SDNode *User : PromOps[i].getNode()->uses()) {
14179 if (User != N && !Visited.count(User))
14180 return SDValue();
14181
14182 // If we're going to promote the non-output-value operand(s) or SELECT or
14183 // SELECT_CC, record them for truncation.
14184 if (User->getOpcode() == ISD::SELECT) {
14185 if (User->getOperand(0) == PromOps[i])
14186 SelectTruncOp[0].insert(std::make_pair(User,
14187 User->getOperand(0).getValueType()));
14188 } else if (User->getOpcode() == ISD::SELECT_CC) {
14189 if (User->getOperand(0) == PromOps[i])
14190 SelectTruncOp[0].insert(std::make_pair(User,
14191 User->getOperand(0).getValueType()));
14192 if (User->getOperand(1) == PromOps[i])
14193 SelectTruncOp[1].insert(std::make_pair(User,
14194 User->getOperand(1).getValueType()));
14195 }
14196 }
14197 }
14198
14199 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14200 bool ReallyNeedsExt = false;
14201 if (N->getOpcode() != ISD::ANY_EXTEND) {
14202 // If all of the inputs are not already sign/zero extended, then
14203 // we'll still need to do that at the end.
14204 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14205 if (isa<ConstantSDNode>(Inputs[i]))
14206 continue;
14207
14208 unsigned OpBits =
14209 Inputs[i].getOperand(0).getValueSizeInBits();
14210 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14211
14212 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14213 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14215 OpBits-PromBits))) ||
14216 (N->getOpcode() == ISD::SIGN_EXTEND &&
14217 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14218 (OpBits-(PromBits-1)))) {
14219 ReallyNeedsExt = true;
14220 break;
14221 }
14222 }
14223 }
14224
14225 // Replace all inputs, either with the truncation operand, or a
14226 // truncation or extension to the final output type.
14227 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14228 // Constant inputs need to be replaced with the to-be-promoted nodes that
14229 // use them because they might have users outside of the cluster of
14230 // promoted nodes.
14231 if (isa<ConstantSDNode>(Inputs[i]))
14232 continue;
14233
14234 SDValue InSrc = Inputs[i].getOperand(0);
14235 if (Inputs[i].getValueType() == N->getValueType(0))
14236 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14237 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14238 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14239 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14240 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14241 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14242 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14243 else
14244 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14245 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14246 }
14247
14248 std::list<HandleSDNode> PromOpHandles;
14249 for (auto &PromOp : PromOps)
14250 PromOpHandles.emplace_back(PromOp);
14251
14252 // Replace all operations (these are all the same, but have a different
14253 // (promoted) return type). DAG.getNode will validate that the types of
14254 // a binary operator match, so go through the list in reverse so that
14255 // we've likely promoted both operands first.
14256 while (!PromOpHandles.empty()) {
14257 SDValue PromOp = PromOpHandles.back().getValue();
14258 PromOpHandles.pop_back();
14259
14260 unsigned C;
14261 switch (PromOp.getOpcode()) {
14262 default: C = 0; break;
14263 case ISD::SELECT: C = 1; break;
14264 case ISD::SELECT_CC: C = 2; break;
14265 }
14266
14267 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14268 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14269 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14270 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14271 // The to-be-promoted operands of this node have not yet been
14272 // promoted (this should be rare because we're going through the
14273 // list backward, but if one of the operands has several users in
14274 // this cluster of to-be-promoted nodes, it is possible).
14275 PromOpHandles.emplace_front(PromOp);
14276 continue;
14277 }
14278
14279 // For SELECT and SELECT_CC nodes, we do a similar check for any
14280 // to-be-promoted comparison inputs.
14281 if (PromOp.getOpcode() == ISD::SELECT ||
14282 PromOp.getOpcode() == ISD::SELECT_CC) {
14283 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14284 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14285 (SelectTruncOp[1].count(PromOp.getNode()) &&
14286 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14287 PromOpHandles.emplace_front(PromOp);
14288 continue;
14289 }
14290 }
14291
14292 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14293 PromOp.getNode()->op_end());
14294
14295 // If this node has constant inputs, then they'll need to be promoted here.
14296 for (unsigned i = 0; i < 2; ++i) {
14297 if (!isa<ConstantSDNode>(Ops[C+i]))
14298 continue;
14299 if (Ops[C+i].getValueType() == N->getValueType(0))
14300 continue;
14301
14302 if (N->getOpcode() == ISD::SIGN_EXTEND)
14303 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14304 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14305 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14306 else
14307 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14308 }
14309
14310 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14311 // truncate them again to the original value type.
14312 if (PromOp.getOpcode() == ISD::SELECT ||
14313 PromOp.getOpcode() == ISD::SELECT_CC) {
14314 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14315 if (SI0 != SelectTruncOp[0].end())
14316 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14317 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14318 if (SI1 != SelectTruncOp[1].end())
14319 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14320 }
14321
14323 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14324 }
14325
14326 // Now we're left with the initial extension itself.
14327 if (!ReallyNeedsExt)
14328 return N->getOperand(0);
14329
14330 // To zero extend, just mask off everything except for the first bit (in the
14331 // i1 case).
14332 if (N->getOpcode() == ISD::ZERO_EXTEND)
14333 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14335 N->getValueSizeInBits(0), PromBits),
14336 dl, N->getValueType(0)));
14337
14338 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14339 "Invalid extension type");
14340 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14342 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14343 return DAG.getNode(
14344 ISD::SRA, dl, N->getValueType(0),
14345 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14346 ShiftCst);
14347}
14348
14349SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14350 DAGCombinerInfo &DCI) const {
14351 assert(N->getOpcode() == ISD::SETCC &&
14352 "Should be called with a SETCC node");
14353
14354 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14355 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14356 SDValue LHS = N->getOperand(0);
14357 SDValue RHS = N->getOperand(1);
14358
14359 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14360 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14361 LHS.hasOneUse())
14362 std::swap(LHS, RHS);
14363
14364 // x == 0-y --> x+y == 0
14365 // x != 0-y --> x+y != 0
14366 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14367 RHS.hasOneUse()) {
14368 SDLoc DL(N);
14369 SelectionDAG &DAG = DCI.DAG;
14370 EVT VT = N->getValueType(0);
14371 EVT OpVT = LHS.getValueType();
14372 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14373 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14374 }
14375 }
14376
14377 return DAGCombineTruncBoolExt(N, DCI);
14378}
14379
14380// Is this an extending load from an f32 to an f64?
14381static bool isFPExtLoad(SDValue Op) {
14382 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14383 return LD->getExtensionType() == ISD::EXTLOAD &&
14384 Op.getValueType() == MVT::f64;
14385 return false;
14386}
14387
14388/// Reduces the number of fp-to-int conversion when building a vector.
14389///
14390/// If this vector is built out of floating to integer conversions,
14391/// transform it to a vector built out of floating point values followed by a
14392/// single floating to integer conversion of the vector.
14393/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14394/// becomes (fptosi (build_vector ($A, $B, ...)))
14395SDValue PPCTargetLowering::
14396combineElementTruncationToVectorTruncation(SDNode *N,
14397 DAGCombinerInfo &DCI) const {
14398 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14399 "Should be called with a BUILD_VECTOR node");
14400
14401 SelectionDAG &DAG = DCI.DAG;
14402 SDLoc dl(N);
14403
14404 SDValue FirstInput = N->getOperand(0);
14405 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14406 "The input operand must be an fp-to-int conversion.");
14407
14408 // This combine happens after legalization so the fp_to_[su]i nodes are
14409 // already converted to PPCSISD nodes.
14410 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14415 bool IsSplat = true;
14418 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14420 EVT TargetVT = N->getValueType(0);
14421 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14422 SDValue NextOp = N->getOperand(i);
14423 if (NextOp.getOpcode() != PPCISD::MFVSR)
14424 return SDValue();
14425 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14427 return SDValue();
14428 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14429 // This is not valid if the input was originally double precision. It is
14430 // also not profitable to do unless this is an extending load in which
14431 // case doing this combine will allow us to combine consecutive loads.
14432 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14433 return SDValue();
14434 if (N->getOperand(i) != FirstInput)
14435 IsSplat = false;
14436 }
14437
14438 // If this is a splat, we leave it as-is since there will be only a single
14439 // fp-to-int conversion followed by a splat of the integer. This is better
14440 // for 32-bit and smaller ints and neutral for 64-bit ints.
14441 if (IsSplat)
14442 return SDValue();
14443
14444 // Now that we know we have the right type of node, get its operands
14445 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14446 SDValue In = N->getOperand(i).getOperand(0);
14447 if (Is32Bit) {
14448 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14449 // here, we know that all inputs are extending loads so this is safe).
14450 if (In.isUndef())
14451 Ops.push_back(DAG.getUNDEF(SrcVT));
14452 else {
14453 SDValue Trunc =
14454 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14455 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14456 Ops.push_back(Trunc);
14457 }
14458 } else
14459 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14460 }
14461
14462 unsigned Opcode;
14465 Opcode = ISD::FP_TO_SINT;
14466 else
14467 Opcode = ISD::FP_TO_UINT;
14468
14469 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14470 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14471 return DAG.getNode(Opcode, dl, TargetVT, BV);
14472 }
14473 return SDValue();
14474}
14475
14476/// Reduce the number of loads when building a vector.
14477///
14478/// Building a vector out of multiple loads can be converted to a load
14479/// of the vector type if the loads are consecutive. If the loads are
14480/// consecutive but in descending order, a shuffle is added at the end
14481/// to reorder the vector.
14483 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14484 "Should be called with a BUILD_VECTOR node");
14485
14486 SDLoc dl(N);
14487
14488 // Return early for non byte-sized type, as they can't be consecutive.
14489 if (!N->getValueType(0).getVectorElementType().isByteSized())
14490 return SDValue();
14491
14492 bool InputsAreConsecutiveLoads = true;
14493 bool InputsAreReverseConsecutive = true;
14494 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14495 SDValue FirstInput = N->getOperand(0);
14496 bool IsRoundOfExtLoad = false;
14497 LoadSDNode *FirstLoad = nullptr;
14498
14499 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14500 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14501 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14502 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14503 }
14504 // Not a build vector of (possibly fp_rounded) loads.
14505 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14506 N->getNumOperands() == 1)
14507 return SDValue();
14508
14509 if (!IsRoundOfExtLoad)
14511
14513 InputLoads.push_back(FirstLoad);
14514 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14515 // If any inputs are fp_round(extload), they all must be.
14516 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14517 return SDValue();
14518
14519 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14520 N->getOperand(i);
14521 if (NextInput.getOpcode() != ISD::LOAD)
14522 return SDValue();
14523
14525 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14528
14529 // If any inputs are fp_round(extload), they all must be.
14530 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14531 return SDValue();
14532
14533 // We only care about regular loads. The PPC-specific load intrinsics
14534 // will not lead to a merge opportunity.
14539
14540 // Exit early if the loads are neither consecutive nor reverse consecutive.
14542 return SDValue();
14543 InputLoads.push_back(LD2);
14544 }
14545
14547 "The loads cannot be both consecutive and reverse consecutive.");
14548
14552 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14553 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14554 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14555 FirstLoad->getAlign());
14557 } else if (InputsAreReverseConsecutive) {
14558 LoadSDNode *LastLoad = InputLoads.back();
14559 assert(LastLoad && "Input needs to be a LoadSDNode.");
14560 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14561 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14562 LastLoad->getAlign());
14564 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14565 Ops.push_back(i);
14566
14567 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14568 DAG.getUNDEF(N->getValueType(0)), Ops);
14569 } else
14570 return SDValue();
14571
14572 for (auto *LD : InputLoads)
14574 return ReturnSDVal;
14575}
14576
14577// This function adds the required vector_shuffle needed to get
14578// the elements of the vector extract in the correct position
14579// as specified by the CorrectElems encoding.
14581 SDValue Input, uint64_t Elems,
14583 SDLoc dl(N);
14584
14585 unsigned NumElems = Input.getValueType().getVectorNumElements();
14586 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14587
14588 // Knowing the element indices being extracted from the original
14589 // vector and the order in which they're being inserted, just put
14590 // them at element indices required for the instruction.
14591 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14592 if (DAG.getDataLayout().isLittleEndian())
14593 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14594 else
14595 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14597 Elems = Elems >> 8;
14598 }
14599
14600 SDValue Shuffle =
14601 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14602 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14603
14604 EVT VT = N->getValueType(0);
14605 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14606
14608 Input.getValueType().getVectorElementType(),
14610 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14611 DAG.getValueType(ExtVT));
14612}
14613
14614// Look for build vector patterns where input operands come from sign
14615// extended vector_extract elements of specific indices. If the correct indices
14616// aren't used, add a vector shuffle to fix up the indices and create
14617// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14618// during instruction selection.
14620 // This array encodes the indices that the vector sign extend instructions
14621 // extract from when extending from one type to another for both BE and LE.
14622 // The right nibble of each byte corresponds to the LE incides.
14623 // and the left nibble of each byte corresponds to the BE incides.
14624 // For example: 0x3074B8FC byte->word
14625 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14626 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14627 // For example: 0x000070F8 byte->double word
14628 // For LE: the allowed indices are: 0x0,0x8
14629 // For BE: the allowed indices are: 0x7,0xF
14630 uint64_t TargetElems[] = {
14631 0x3074B8FC, // b->w
14632 0x000070F8, // b->d
14633 0x10325476, // h->w
14634 0x00003074, // h->d
14635 0x00001032, // w->d
14636 };
14637
14638 uint64_t Elems = 0;
14639 int Index;
14640 SDValue Input;
14641
14642 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14643 if (!Op)
14644 return false;
14645 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14646 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14647 return false;
14648
14649 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14650 // of the right width.
14651 SDValue Extract = Op.getOperand(0);
14652 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14653 Extract = Extract.getOperand(0);
14654 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14655 return false;
14656
14658 if (!ExtOp)
14659 return false;
14660
14661 Index = ExtOp->getZExtValue();
14662 if (Input && Input != Extract.getOperand(0))
14663 return false;
14664
14665 if (!Input)
14666 Input = Extract.getOperand(0);
14667
14668 Elems = Elems << 8;
14669 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14670 Elems |= Index;
14671
14672 return true;
14673 };
14674
14675 // If the build vector operands aren't sign extended vector extracts,
14676 // of the same input vector, then return.
14677 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14678 if (!isSExtOfVecExtract(N->getOperand(i))) {
14679 return SDValue();
14680 }
14681 }
14682
14683 // If the vector extract indicies are not correct, add the appropriate
14684 // vector_shuffle.
14685 int TgtElemArrayIdx;
14686 int InputSize = Input.getValueType().getScalarSizeInBits();
14687 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14688 if (InputSize + OutputSize == 40)
14689 TgtElemArrayIdx = 0;
14690 else if (InputSize + OutputSize == 72)
14691 TgtElemArrayIdx = 1;
14692 else if (InputSize + OutputSize == 48)
14693 TgtElemArrayIdx = 2;
14694 else if (InputSize + OutputSize == 80)
14695 TgtElemArrayIdx = 3;
14696 else if (InputSize + OutputSize == 96)
14697 TgtElemArrayIdx = 4;
14698 else
14699 return SDValue();
14700
14703 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14704 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14705 if (Elems != CorrectElems) {
14706 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14707 }
14708
14709 // Regular lowering will catch cases where a shuffle is not needed.
14710 return SDValue();
14711}
14712
14713// Look for the pattern of a load from a narrow width to i128, feeding
14714// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14715// (LXVRZX). This node represents a zero extending load that will be matched
14716// to the Load VSX Vector Rightmost instructions.
14718 SDLoc DL(N);
14719
14720 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14721 if (N->getValueType(0) != MVT::v1i128)
14722 return SDValue();
14723
14724 SDValue Operand = N->getOperand(0);
14725 // Proceed with the transformation if the operand to the BUILD_VECTOR
14726 // is a load instruction.
14727 if (Operand.getOpcode() != ISD::LOAD)
14728 return SDValue();
14729
14730 auto *LD = cast<LoadSDNode>(Operand);
14731 EVT MemoryType = LD->getMemoryVT();
14732
14733 // This transformation is only valid if the we are loading either a byte,
14734 // halfword, word, or doubleword.
14735 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14736 MemoryType == MVT::i32 || MemoryType == MVT::i64;
14737
14738 // Ensure that the load from the narrow width is being zero extended to i128.
14739 if (!ValidLDType ||
14740 (LD->getExtensionType() != ISD::ZEXTLOAD &&
14741 LD->getExtensionType() != ISD::EXTLOAD))
14742 return SDValue();
14743
14744 SDValue LoadOps[] = {
14745 LD->getChain(), LD->getBasePtr(),
14746 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14747
14749 DAG.getVTList(MVT::v1i128, MVT::Other),
14750 LoadOps, MemoryType, LD->getMemOperand());
14751}
14752
14753SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14754 DAGCombinerInfo &DCI) const {
14755 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14756 "Should be called with a BUILD_VECTOR node");
14757
14758 SelectionDAG &DAG = DCI.DAG;
14759 SDLoc dl(N);
14760
14761 if (!Subtarget.hasVSX())
14762 return SDValue();
14763
14764 // The target independent DAG combiner will leave a build_vector of
14765 // float-to-int conversions intact. We can generate MUCH better code for
14766 // a float-to-int conversion of a vector of floats.
14767 SDValue FirstInput = N->getOperand(0);
14768 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14769 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14770 if (Reduced)
14771 return Reduced;
14772 }
14773
14774 // If we're building a vector out of consecutive loads, just load that
14775 // vector type.
14777 if (Reduced)
14778 return Reduced;
14779
14780 // If we're building a vector out of extended elements from another vector
14781 // we have P9 vector integer extend instructions. The code assumes legal
14782 // input types (i.e. it can't handle things like v4i16) so do not run before
14783 // legalization.
14784 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14786 if (Reduced)
14787 return Reduced;
14788 }
14789
14790 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14791 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14792 // is a load from <valid narrow width> to i128.
14793 if (Subtarget.isISA3_1()) {
14795 if (BVOfZLoad)
14796 return BVOfZLoad;
14797 }
14798
14799 if (N->getValueType(0) != MVT::v2f64)
14800 return SDValue();
14801
14802 // Looking for:
14803 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14804 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14805 FirstInput.getOpcode() != ISD::UINT_TO_FP)
14806 return SDValue();
14807 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14808 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14809 return SDValue();
14810 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14811 return SDValue();
14812
14813 SDValue Ext1 = FirstInput.getOperand(0);
14814 SDValue Ext2 = N->getOperand(1).getOperand(0);
14815 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14816 Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14817 return SDValue();
14818
14821 if (!Ext1Op || !Ext2Op)
14822 return SDValue();
14823 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14824 Ext1.getOperand(0) != Ext2.getOperand(0))
14825 return SDValue();
14826
14827 int FirstElem = Ext1Op->getZExtValue();
14828 int SecondElem = Ext2Op->getZExtValue();
14829 int SubvecIdx;
14830 if (FirstElem == 0 && SecondElem == 1)
14831 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14832 else if (FirstElem == 2 && SecondElem == 3)
14833 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14834 else
14835 return SDValue();
14836
14837 SDValue SrcVec = Ext1.getOperand(0);
14838 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14840 return DAG.getNode(NodeType, dl, MVT::v2f64,
14842}
14843
14844SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14845 DAGCombinerInfo &DCI) const {
14846 assert((N->getOpcode() == ISD::SINT_TO_FP ||
14847 N->getOpcode() == ISD::UINT_TO_FP) &&
14848 "Need an int -> FP conversion node here");
14849
14850 if (useSoftFloat() || !Subtarget.has64BitSupport())
14851 return SDValue();
14852
14853 SelectionDAG &DAG = DCI.DAG;
14854 SDLoc dl(N);
14855 SDValue Op(N, 0);
14856
14857 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14858 // from the hardware.
14859 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14860 return SDValue();
14861 if (!Op.getOperand(0).getValueType().isSimple())
14862 return SDValue();
14863 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14864 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14865 return SDValue();
14866
14867 SDValue FirstOperand(Op.getOperand(0));
14868 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14869 (FirstOperand.getValueType() == MVT::i8 ||
14870 FirstOperand.getValueType() == MVT::i16);
14871 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14872 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14873 bool DstDouble = Op.getValueType() == MVT::f64;
14874 unsigned ConvOp = Signed ?
14876 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14878 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14879 dl, false);
14881 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14883 DAG.getVTList(MVT::f64, MVT::Other),
14884 Ops, MVT::i8, LDN->getMemOperand());
14885
14886 // For signed conversion, we need to sign-extend the value in the VSR
14887 if (Signed) {
14888 SDValue ExtOps[] = { Ld, WidthConst };
14889 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14890 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14891 } else
14892 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14893 }
14894
14895
14896 // For i32 intermediate values, unfortunately, the conversion functions
14897 // leave the upper 32 bits of the value are undefined. Within the set of
14898 // scalar instructions, we have no method for zero- or sign-extending the
14899 // value. Thus, we cannot handle i32 intermediate values here.
14900 if (Op.getOperand(0).getValueType() == MVT::i32)
14901 return SDValue();
14902
14903 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14904 "UINT_TO_FP is supported only with FPCVT");
14905
14906 // If we have FCFIDS, then use it when converting to single-precision.
14907 // Otherwise, convert to double-precision and then round.
14908 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14909 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14911 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14912 : PPCISD::FCFID);
14913 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14914 ? MVT::f32
14915 : MVT::f64;
14916
14917 // If we're converting from a float, to an int, and back to a float again,
14918 // then we don't need the store/load pair at all.
14919 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14920 Subtarget.hasFPCVT()) ||
14921 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14922 SDValue Src = Op.getOperand(0).getOperand(0);
14923 if (Src.getValueType() == MVT::f32) {
14924 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14925 DCI.AddToWorklist(Src.getNode());
14926 } else if (Src.getValueType() != MVT::f64) {
14927 // Make sure that we don't pick up a ppc_fp128 source value.
14928 return SDValue();
14929 }
14930
14931 unsigned FCTOp =
14932 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14934
14935 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14936 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14937
14938 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14939 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
14940 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
14941 DCI.AddToWorklist(FP.getNode());
14942 }
14943
14944 return FP;
14945 }
14946
14947 return SDValue();
14948}
14949
14950// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14951// builtins) into loads with swaps.
14953 DAGCombinerInfo &DCI) const {
14954 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
14955 // load combines.
14956 if (DCI.isBeforeLegalizeOps())
14957 return SDValue();
14958
14959 SelectionDAG &DAG = DCI.DAG;
14960 SDLoc dl(N);
14961 SDValue Chain;
14962 SDValue Base;
14963 MachineMemOperand *MMO;
14964
14965 switch (N->getOpcode()) {
14966 default:
14967 llvm_unreachable("Unexpected opcode for little endian VSX load");
14968 case ISD::LOAD: {
14970 Chain = LD->getChain();
14971 Base = LD->getBasePtr();
14972 MMO = LD->getMemOperand();
14973 // If the MMO suggests this isn't a load of a full vector, leave
14974 // things alone. For a built-in, we have to make the change for
14975 // correctness, so if there is a size problem that will be a bug.
14976 if (MMO->getSize() < 16)
14977 return SDValue();
14978 break;
14979 }
14982 Chain = Intrin->getChain();
14983 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14984 // us what we want. Get operand 2 instead.
14985 Base = Intrin->getOperand(2);
14986 MMO = Intrin->getMemOperand();
14987 break;
14988 }
14989 }
14990
14991 MVT VecTy = N->getValueType(0).getSimpleVT();
14992
14993 SDValue LoadOps[] = { Chain, Base };
14995 DAG.getVTList(MVT::v2f64, MVT::Other),
14996 LoadOps, MVT::v2f64, MMO);
14997
14998 DCI.AddToWorklist(Load.getNode());
14999 Chain = Load.getValue(1);
15000 SDValue Swap = DAG.getNode(
15001 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15002 DCI.AddToWorklist(Swap.getNode());
15003
15004 // Add a bitcast if the resulting load type doesn't match v2f64.
15005 if (VecTy != MVT::v2f64) {
15006 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15007 DCI.AddToWorklist(N.getNode());
15008 // Package {bitcast value, swap's chain} to match Load's shape.
15009 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15010 N, Swap.getValue(1));
15011 }
15012
15013 return Swap;
15014}
15015
15016// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15017// builtins) into stores with swaps.
15019 DAGCombinerInfo &DCI) const {
15020 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15021 // store combines.
15022 if (DCI.isBeforeLegalizeOps())
15023 return SDValue();
15024
15025 SelectionDAG &DAG = DCI.DAG;
15026 SDLoc dl(N);
15027 SDValue Chain;
15028 SDValue Base;
15029 unsigned SrcOpnd;
15030 MachineMemOperand *MMO;
15031
15032 switch (N->getOpcode()) {
15033 default:
15034 llvm_unreachable("Unexpected opcode for little endian VSX store");
15035 case ISD::STORE: {
15037 Chain = ST->getChain();
15038 Base = ST->getBasePtr();
15039 MMO = ST->getMemOperand();
15040 SrcOpnd = 1;
15041 // If the MMO suggests this isn't a store of a full vector, leave
15042 // things alone. For a built-in, we have to make the change for
15043 // correctness, so if there is a size problem that will be a bug.
15044 if (MMO->getSize() < 16)
15045 return SDValue();
15046 break;
15047 }
15048 case ISD::INTRINSIC_VOID: {
15050 Chain = Intrin->getChain();
15051 // Intrin->getBasePtr() oddly does not get what we want.
15052 Base = Intrin->getOperand(3);
15053 MMO = Intrin->getMemOperand();
15054 SrcOpnd = 2;
15055 break;
15056 }
15057 }
15058
15059 SDValue Src = N->getOperand(SrcOpnd);
15060 MVT VecTy = Src.getValueType().getSimpleVT();
15061
15062 // All stores are done as v2f64 and possible bit cast.
15063 if (VecTy != MVT::v2f64) {
15064 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15065 DCI.AddToWorklist(Src.getNode());
15066 }
15067
15068 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15069 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15070 DCI.AddToWorklist(Swap.getNode());
15071 Chain = Swap.getValue(1);
15072 SDValue StoreOps[] = { Chain, Swap, Base };
15074 DAG.getVTList(MVT::Other),
15075 StoreOps, VecTy, MMO);
15076 DCI.AddToWorklist(Store.getNode());
15077 return Store;
15078}
15079
15080// Handle DAG combine for STORE (FP_TO_INT F).
15081SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15082 DAGCombinerInfo &DCI) const {
15083 SelectionDAG &DAG = DCI.DAG;
15084 SDLoc dl(N);
15085 unsigned Opcode = N->getOperand(1).getOpcode();
15086 (void)Opcode;
15087 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15088
15089 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15090 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15091 && "Not a FP_TO_INT Instruction!");
15092
15093 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15094 EVT Op1VT = N->getOperand(1).getValueType();
15095 EVT ResVT = Val.getValueType();
15096
15097 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15098 return SDValue();
15099
15100 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15102 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15103 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15104
15105 // TODO: Lower conversion from f128 on all VSX targets
15106 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15107 return SDValue();
15108
15109 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15110 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15111 return SDValue();
15112
15113 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15114
15115 // Set number of bytes being converted.
15116 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15117 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15118 DAG.getIntPtrConstant(ByteSize, dl, false),
15119 DAG.getValueType(Op1VT)};
15120
15122 DAG.getVTList(MVT::Other), Ops,
15123 cast<StoreSDNode>(N)->getMemoryVT(),
15124 cast<StoreSDNode>(N)->getMemOperand());
15125
15126 return Val;
15127}
15128
15129static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15130 // Check that the source of the element keeps flipping
15131 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15132 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15133 for (int i = 1, e = Mask.size(); i < e; i++) {
15134 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15135 return false;
15136 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15137 return false;
15139 }
15140 return true;
15141}
15142
15143static bool isSplatBV(SDValue Op) {
15144 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15145 return false;
15146 SDValue FirstOp;
15147
15148 // Find first non-undef input.
15149 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15150 FirstOp = Op.getOperand(i);
15151 if (!FirstOp.isUndef())
15152 break;
15153 }
15154
15155 // All inputs are undef or the same as the first non-undef input.
15156 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15157 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15158 return false;
15159 return true;
15160}
15161
15163 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15164 return Op;
15165 if (Op.getOpcode() != ISD::BITCAST)
15166 return SDValue();
15167 Op = Op.getOperand(0);
15168 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15169 return Op;
15170 return SDValue();
15171}
15172
15173// Fix up the shuffle mask to account for the fact that the result of
15174// scalar_to_vector is not in lane zero. This just takes all values in
15175// the ranges specified by the min/max indices and adds the number of
15176// elements required to ensure each element comes from the respective
15177// position in the valid lane.
15178// On little endian, that's just the corresponding element in the other
15179// half of the vector. On big endian, it is in the same half but right
15180// justified rather than left justified in that half.
15182 int LHSMaxIdx, int RHSMinIdx,
15183 int RHSMaxIdx, int HalfVec,
15184 unsigned ValidLaneWidth,
15185 const PPCSubtarget &Subtarget) {
15186 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15187 int Idx = ShuffV[i];
15188 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15189 ShuffV[i] +=
15190 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15191 }
15192}
15193
15194// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15195// the original is:
15196// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15197// In such a case, just change the shuffle mask to extract the element
15198// from the permuted index.
15200 const PPCSubtarget &Subtarget) {
15201 SDLoc dl(OrigSToV);
15202 EVT VT = OrigSToV.getValueType();
15203 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15204 "Expecting a SCALAR_TO_VECTOR here");
15205 SDValue Input = OrigSToV.getOperand(0);
15206
15207 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15208 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15209 SDValue OrigVector = Input.getOperand(0);
15210
15211 // Can't handle non-const element indices or different vector types
15212 // for the input to the extract and the output of the scalar_to_vector.
15213 if (Idx && VT == OrigVector.getValueType()) {
15214 unsigned NumElts = VT.getVectorNumElements();
15215 assert(
15216 NumElts > 1 &&
15217 "Cannot produce a permuted scalar_to_vector for one element vector");
15218 SmallVector<int, 16> NewMask(NumElts, -1);
15219 unsigned ResultInElt = NumElts / 2;
15220 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15221 NewMask[ResultInElt] = Idx->getZExtValue();
15222 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15223 }
15224 }
15225 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15226 OrigSToV.getOperand(0));
15227}
15228
15229// On little endian subtargets, combine shuffles such as:
15230// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15231// into:
15232// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15233// because the latter can be matched to a single instruction merge.
15234// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15235// to put the value into element zero. Adjust the shuffle mask so that the
15236// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15237// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15238// nodes with elements smaller than doubleword because all the ways
15239// of getting scalar data into a vector register put the value in the
15240// rightmost element of the left half of the vector.
15241SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15242 SelectionDAG &DAG) const {
15243 SDValue LHS = SVN->getOperand(0);
15244 SDValue RHS = SVN->getOperand(1);
15245 auto Mask = SVN->getMask();
15246 int NumElts = LHS.getValueType().getVectorNumElements();
15247 SDValue Res(SVN, 0);
15248 SDLoc dl(SVN);
15249 bool IsLittleEndian = Subtarget.isLittleEndian();
15250
15251 // On big endian targets this is only useful for subtargets with direct moves.
15252 // On little endian targets it would be useful for all subtargets with VSX.
15253 // However adding special handling for LE subtargets without direct moves
15254 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15255 // which includes direct moves.
15256 if (!Subtarget.hasDirectMove())
15257 return Res;
15258
15259 // If this is not a shuffle of a shuffle and the first element comes from
15260 // the second vector, canonicalize to the commuted form. This will make it
15261 // more likely to match one of the single instruction patterns.
15262 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15263 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15264 std::swap(LHS, RHS);
15265 Res = DAG.getCommutedVectorShuffle(*SVN);
15266 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15267 }
15268
15269 // Adjust the shuffle mask if either input vector comes from a
15270 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15271 // form (to prevent the need for a swap).
15275 if (SToVLHS || SToVRHS) {
15276 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15277 // same type and have differing element sizes, then do not perform
15278 // the following transformation. The current transformation for
15279 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15280 // element size. This will be updated in the future to account for
15281 // differing sizes of the LHS and RHS.
15282 if (SToVLHS && SToVRHS &&
15283 (SToVLHS.getValueType().getScalarSizeInBits() !=
15284 SToVRHS.getValueType().getScalarSizeInBits()))
15285 return Res;
15286
15287 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15288 : SToVRHS.getValueType().getVectorNumElements();
15289 int NumEltsOut = ShuffV.size();
15290 // The width of the "valid lane" (i.e. the lane that contains the value that
15291 // is vectorized) needs to be expressed in terms of the number of elements
15292 // of the shuffle. It is thereby the ratio of the values before and after
15293 // any bitcast.
15294 unsigned ValidLaneWidth =
15295 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15296 LHS.getValueType().getScalarSizeInBits()
15297 : SToVRHS.getValueType().getScalarSizeInBits() /
15298 RHS.getValueType().getScalarSizeInBits();
15299
15300 // Initially assume that neither input is permuted. These will be adjusted
15301 // accordingly if either input is.
15302 int LHSMaxIdx = -1;
15303 int RHSMinIdx = -1;
15304 int RHSMaxIdx = -1;
15305 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15306
15307 // Get the permuted scalar to vector nodes for the source(s) that come from
15308 // ISD::SCALAR_TO_VECTOR.
15309 // On big endian systems, this only makes sense for element sizes smaller
15310 // than 64 bits since for 64-bit elements, all instructions already put
15311 // the value into element zero. Since scalar size of LHS and RHS may differ
15312 // after isScalarToVec, this should be checked using their own sizes.
15313 if (SToVLHS) {
15314 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15315 return Res;
15316 // Set up the values for the shuffle vector fixup.
15318 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15319 if (SToVLHS.getValueType() != LHS.getValueType())
15320 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15321 LHS = SToVLHS;
15322 }
15323 if (SToVRHS) {
15324 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15325 return Res;
15328 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15329 if (SToVRHS.getValueType() != RHS.getValueType())
15330 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15331 RHS = SToVRHS;
15332 }
15333
15334 // Fix up the shuffle mask to reflect where the desired element actually is.
15335 // The minimum and maximum indices that correspond to element zero for both
15336 // the LHS and RHS are computed and will control which shuffle mask entries
15337 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15338 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15340 HalfVec, ValidLaneWidth, Subtarget);
15341 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15342
15343 // We may have simplified away the shuffle. We won't be able to do anything
15344 // further with it here.
15345 if (!isa<ShuffleVectorSDNode>(Res))
15346 return Res;
15347 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15348 }
15349
15350 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15351 // The common case after we commuted the shuffle is that the RHS is a splat
15352 // and we have elements coming in from the splat at indices that are not
15353 // conducive to using a merge.
15354 // Example:
15355 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15356 if (!isSplatBV(TheSplat))
15357 return Res;
15358
15359 // We are looking for a mask such that all even elements are from
15360 // one vector and all odd elements from the other.
15361 if (!isAlternatingShuffMask(Mask, NumElts))
15362 return Res;
15363
15364 // Adjust the mask so we are pulling in the same index from the splat
15365 // as the index from the interesting vector in consecutive elements.
15366 if (IsLittleEndian) {
15367 // Example (even elements from first vector):
15368 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15369 if (Mask[0] < NumElts)
15370 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15371 if (ShuffV[i] < 0)
15372 continue;
15373 ShuffV[i] = (ShuffV[i - 1] + NumElts);
15374 }
15375 // Example (odd elements from first vector):
15376 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15377 else
15378 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15379 if (ShuffV[i] < 0)
15380 continue;
15381 ShuffV[i] = (ShuffV[i + 1] + NumElts);
15382 }
15383 } else {
15384 // Example (even elements from first vector):
15385 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15386 if (Mask[0] < NumElts)
15387 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15388 if (ShuffV[i] < 0)
15389 continue;
15390 ShuffV[i] = ShuffV[i + 1] - NumElts;
15391 }
15392 // Example (odd elements from first vector):
15393 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15394 else
15395 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15396 if (ShuffV[i] < 0)
15397 continue;
15398 ShuffV[i] = ShuffV[i - 1] - NumElts;
15399 }
15400 }
15401
15402 // If the RHS has undefs, we need to remove them since we may have created
15403 // a shuffle that adds those instead of the splat value.
15405 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15406 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15407
15408 if (IsLittleEndian)
15409 RHS = TheSplat;
15410 else
15411 LHS = TheSplat;
15412 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15413}
15414
15415SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15417 DAGCombinerInfo &DCI) const {
15419 "Not a reverse memop pattern!");
15420
15421 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15422 auto Mask = SVN->getMask();
15423 int i = 0;
15424 auto I = Mask.rbegin();
15425 auto E = Mask.rend();
15426
15427 for (; I != E; ++I) {
15428 if (*I != i)
15429 return false;
15430 i++;
15431 }
15432 return true;
15433 };
15434
15435 SelectionDAG &DAG = DCI.DAG;
15436 EVT VT = SVN->getValueType(0);
15437
15438 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15439 return SDValue();
15440
15441 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15442 // See comment in PPCVSXSwapRemoval.cpp.
15443 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15444 if (!Subtarget.hasP9Vector())
15445 return SDValue();
15446
15447 if(!IsElementReverse(SVN))
15448 return SDValue();
15449
15450 if (LSBase->getOpcode() == ISD::LOAD) {
15451 // If the load return value 0 has more than one user except the
15452 // shufflevector instruction, it is not profitable to replace the
15453 // shufflevector with a reverse load.
15454 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15455 UI != UE; ++UI)
15456 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15457 return SDValue();
15458
15459 SDLoc dl(LSBase);
15460 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15461 return DAG.getMemIntrinsicNode(
15462 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15463 LSBase->getMemoryVT(), LSBase->getMemOperand());
15464 }
15465
15466 if (LSBase->getOpcode() == ISD::STORE) {
15467 // If there are other uses of the shuffle, the swap cannot be avoided.
15468 // Forcing the use of an X-Form (since swapped stores only have
15469 // X-Forms) without removing the swap is unprofitable.
15470 if (!SVN->hasOneUse())
15471 return SDValue();
15472
15473 SDLoc dl(LSBase);
15474 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15475 LSBase->getBasePtr()};
15476 return DAG.getMemIntrinsicNode(
15477 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15478 LSBase->getMemoryVT(), LSBase->getMemOperand());
15479 }
15480
15481 llvm_unreachable("Expected a load or store node here");
15482}
15483
15485 unsigned IntrinsicID =
15486 cast<ConstantSDNode>(Intrin.getOperand(1))->getZExtValue();
15487 if (IntrinsicID == Intrinsic::ppc_stdcx)
15488 StoreWidth = 8;
15489 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15490 StoreWidth = 4;
15491 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15492 StoreWidth = 2;
15493 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15494 StoreWidth = 1;
15495 else
15496 return false;
15497 return true;
15498}
15499
15501 DAGCombinerInfo &DCI) const {
15502 SelectionDAG &DAG = DCI.DAG;
15503 SDLoc dl(N);
15504 switch (N->getOpcode()) {
15505 default: break;
15506 case ISD::ADD:
15507 return combineADD(N, DCI);
15508 case ISD::AND: {
15509 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15510 // original input as that will prevent us from selecting optimal rotates.
15511 // This only matters if the input to the extend is i32 widened to i64.
15512 SDValue Op1 = N->getOperand(0);
15513 SDValue Op2 = N->getOperand(1);
15514 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15515 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15516 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15517 Op1.getOperand(0).getValueType() != MVT::i32)
15518 break;
15519 SDValue NarrowOp = Op1.getOperand(0);
15520 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15521 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15522 break;
15523
15524 uint64_t Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
15525 // Make sure that the constant is narrow enough to fit in the narrow type.
15526 if (!isUInt<32>(Imm))
15527 break;
15528 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15529 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15530 return DAG.getAnyExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15531 }
15532 case ISD::SHL:
15533 return combineSHL(N, DCI);
15534 case ISD::SRA:
15535 return combineSRA(N, DCI);
15536 case ISD::SRL:
15537 return combineSRL(N, DCI);
15538 case ISD::MUL:
15539 return combineMUL(N, DCI);
15540 case ISD::FMA:
15541 case PPCISD::FNMSUB:
15542 return combineFMALike(N, DCI);
15543 case PPCISD::SHL:
15544 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15545 return N->getOperand(0);
15546 break;
15547 case PPCISD::SRL:
15548 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15549 return N->getOperand(0);
15550 break;
15551 case PPCISD::SRA:
15552 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15553 if (C->isZero() || // 0 >>s V -> 0.
15554 C->isAllOnes()) // -1 >>s V -> -1.
15555 return N->getOperand(0);
15556 }
15557 break;
15558 case ISD::SIGN_EXTEND:
15559 case ISD::ZERO_EXTEND:
15560 case ISD::ANY_EXTEND:
15561 return DAGCombineExtBoolTrunc(N, DCI);
15562 case ISD::TRUNCATE:
15563 return combineTRUNCATE(N, DCI);
15564 case ISD::SETCC:
15565 if (SDValue CSCC = combineSetCC(N, DCI))
15566 return CSCC;
15567 [[fallthrough]];
15568 case ISD::SELECT_CC:
15569 return DAGCombineTruncBoolExt(N, DCI);
15570 case ISD::SINT_TO_FP:
15571 case ISD::UINT_TO_FP:
15572 return combineFPToIntToFP(N, DCI);
15574 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15575 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15576 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15577 }
15578 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15579 case ISD::STORE: {
15580
15581 EVT Op1VT = N->getOperand(1).getValueType();
15582 unsigned Opcode = N->getOperand(1).getOpcode();
15583
15584 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15585 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15586 SDValue Val = combineStoreFPToInt(N, DCI);
15587 if (Val)
15588 return Val;
15589 }
15590
15591 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15593 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15594 if (Val)
15595 return Val;
15596 }
15597
15598 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15599 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15600 N->getOperand(1).getNode()->hasOneUse() &&
15601 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15602 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15603
15604 // STBRX can only handle simple types and it makes no sense to store less
15605 // two bytes in byte-reversed order.
15606 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15607 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15608 break;
15609
15610 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15611 // Do an any-extend to 32-bits if this is a half-word input.
15612 if (BSwapOp.getValueType() == MVT::i16)
15613 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15614
15615 // If the type of BSWAP operand is wider than stored memory width
15616 // it need to be shifted to the right side before STBRX.
15617 if (Op1VT.bitsGT(mVT)) {
15618 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15619 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15620 DAG.getConstant(Shift, dl, MVT::i32));
15621 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15622 if (Op1VT == MVT::i64)
15623 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15624 }
15625
15626 SDValue Ops[] = {
15627 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15628 };
15629 return
15630 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15631 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15632 cast<StoreSDNode>(N)->getMemOperand());
15633 }
15634
15635 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15636 // So it can increase the chance of CSE constant construction.
15637 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15638 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15639 // Need to sign-extended to 64-bits to handle negative values.
15640 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15641 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15642 MemVT.getSizeInBits());
15643 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15644
15645 // DAG.getTruncStore() can't be used here because it doesn't accept
15646 // the general (base + offset) addressing mode.
15647 // So we use UpdateNodeOperands and setTruncatingStore instead.
15648 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15649 N->getOperand(3));
15650 cast<StoreSDNode>(N)->setTruncatingStore(true);
15651 return SDValue(N, 0);
15652 }
15653
15654 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15655 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15656 if (Op1VT.isSimple()) {
15657 MVT StoreVT = Op1VT.getSimpleVT();
15658 if (Subtarget.needsSwapsForVSXMemOps() &&
15659 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15660 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15661 return expandVSXStoreForLE(N, DCI);
15662 }
15663 break;
15664 }
15665 case ISD::LOAD: {
15667 EVT VT = LD->getValueType(0);
15668
15669 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15670 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15671 if (VT.isSimple()) {
15672 MVT LoadVT = VT.getSimpleVT();
15673 if (Subtarget.needsSwapsForVSXMemOps() &&
15674 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15675 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15676 return expandVSXLoadForLE(N, DCI);
15677 }
15678
15679 // We sometimes end up with a 64-bit integer load, from which we extract
15680 // two single-precision floating-point numbers. This happens with
15681 // std::complex<float>, and other similar structures, because of the way we
15682 // canonicalize structure copies. However, if we lack direct moves,
15683 // then the final bitcasts from the extracted integer values to the
15684 // floating-point numbers turn into store/load pairs. Even with direct moves,
15685 // just loading the two floating-point numbers is likely better.
15686 auto ReplaceTwoFloatLoad = [&]() {
15687 if (VT != MVT::i64)
15688 return false;
15689
15690 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15691 LD->isVolatile())
15692 return false;
15693
15694 // We're looking for a sequence like this:
15695 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15696 // t16: i64 = srl t13, Constant:i32<32>
15697 // t17: i32 = truncate t16
15698 // t18: f32 = bitcast t17
15699 // t19: i32 = truncate t13
15700 // t20: f32 = bitcast t19
15701
15702 if (!LD->hasNUsesOfValue(2, 0))
15703 return false;
15704
15705 auto UI = LD->use_begin();
15706 while (UI.getUse().getResNo() != 0) ++UI;
15707 SDNode *Trunc = *UI++;
15708 while (UI.getUse().getResNo() != 0) ++UI;
15709 SDNode *RightShift = *UI;
15710 if (Trunc->getOpcode() != ISD::TRUNCATE)
15711 std::swap(Trunc, RightShift);
15712
15713 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15714 Trunc->getValueType(0) != MVT::i32 ||
15715 !Trunc->hasOneUse())
15716 return false;
15717 if (RightShift->getOpcode() != ISD::SRL ||
15718 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15719 RightShift->getConstantOperandVal(1) != 32 ||
15720 !RightShift->hasOneUse())
15721 return false;
15722
15723 SDNode *Trunc2 = *RightShift->use_begin();
15724 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15725 Trunc2->getValueType(0) != MVT::i32 ||
15726 !Trunc2->hasOneUse())
15727 return false;
15728
15729 SDNode *Bitcast = *Trunc->use_begin();
15730 SDNode *Bitcast2 = *Trunc2->use_begin();
15731
15732 if (Bitcast->getOpcode() != ISD::BITCAST ||
15733 Bitcast->getValueType(0) != MVT::f32)
15734 return false;
15735 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15736 Bitcast2->getValueType(0) != MVT::f32)
15737 return false;
15738
15739 if (Subtarget.isLittleEndian())
15740 std::swap(Bitcast, Bitcast2);
15741
15742 // Bitcast has the second float (in memory-layout order) and Bitcast2
15743 // has the first one.
15744
15745 SDValue BasePtr = LD->getBasePtr();
15746 if (LD->isIndexed()) {
15747 assert(LD->getAddressingMode() == ISD::PRE_INC &&
15748 "Non-pre-inc AM on PPC?");
15749 BasePtr =
15750 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15751 LD->getOffset());
15752 }
15753
15754 auto MMOFlags =
15755 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15756 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15757 LD->getPointerInfo(), LD->getAlign(),
15758 MMOFlags, LD->getAAInfo());
15759 SDValue AddPtr =
15760 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15761 BasePtr, DAG.getIntPtrConstant(4, dl));
15763 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15764 LD->getPointerInfo().getWithOffset(4),
15765 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15766
15767 if (LD->isIndexed()) {
15768 // Note that DAGCombine should re-form any pre-increment load(s) from
15769 // what is produced here if that makes sense.
15770 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15771 }
15772
15773 DCI.CombineTo(Bitcast2, FloatLoad);
15774 DCI.CombineTo(Bitcast, FloatLoad2);
15775
15776 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15777 SDValue(FloatLoad2.getNode(), 1));
15778 return true;
15779 };
15780
15781 if (ReplaceTwoFloatLoad())
15782 return SDValue(N, 0);
15783
15784 EVT MemVT = LD->getMemoryVT();
15785 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15787 if (LD->isUnindexed() && VT.isVector() &&
15788 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15789 // P8 and later hardware should just use LOAD.
15790 !Subtarget.hasP8Vector() &&
15791 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15792 VT == MVT::v4f32))) &&
15793 LD->getAlign() < ABIAlignment) {
15794 // This is a type-legal unaligned Altivec load.
15795 SDValue Chain = LD->getChain();
15796 SDValue Ptr = LD->getBasePtr();
15797 bool isLittleEndian = Subtarget.isLittleEndian();
15798
15799 // This implements the loading of unaligned vectors as described in
15800 // the venerable Apple Velocity Engine overview. Specifically:
15801 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15802 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15803 //
15804 // The general idea is to expand a sequence of one or more unaligned
15805 // loads into an alignment-based permutation-control instruction (lvsl
15806 // or lvsr), a series of regular vector loads (which always truncate
15807 // their input address to an aligned address), and a series of
15808 // permutations. The results of these permutations are the requested
15809 // loaded values. The trick is that the last "extra" load is not taken
15810 // from the address you might suspect (sizeof(vector) bytes after the
15811 // last requested load), but rather sizeof(vector) - 1 bytes after the
15812 // last requested vector. The point of this is to avoid a page fault if
15813 // the base address happened to be aligned. This works because if the
15814 // base address is aligned, then adding less than a full vector length
15815 // will cause the last vector in the sequence to be (re)loaded.
15816 // Otherwise, the next vector will be fetched as you might suspect was
15817 // necessary.
15818
15819 // We might be able to reuse the permutation generation from
15820 // a different base address offset from this one by an aligned amount.
15821 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15822 // optimization later.
15825 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15826 : Intrinsic::ppc_altivec_lvsl;
15827 IntrLD = Intrinsic::ppc_altivec_lvx;
15828 IntrPerm = Intrinsic::ppc_altivec_vperm;
15829 PermCntlTy = MVT::v16i8;
15830 PermTy = MVT::v4i32;
15831 LDTy = MVT::v4i32;
15832
15834
15835 // Create the new MMO for the new base load. It is like the original MMO,
15836 // but represents an area in memory almost twice the vector size centered
15837 // on the original address. If the address is unaligned, we might start
15838 // reading up to (sizeof(vector)-1) bytes below the address of the
15839 // original unaligned load.
15842 MF.getMachineMemOperand(LD->getMemOperand(),
15843 -(int64_t)MemVT.getStoreSize()+1,
15844 2*MemVT.getStoreSize()-1);
15845
15846 // Create the new base load.
15849 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15852 DAG.getVTList(PermTy, MVT::Other),
15854
15855 // Note that the value of IncOffset (which is provided to the next
15856 // load's pointer info offset value, and thus used to calculate the
15857 // alignment), and the value of IncValue (which is actually used to
15858 // increment the pointer value) are different! This is because we
15859 // require the next load to appear to be aligned, even though it
15860 // is actually offset from the base pointer by a lesser amount.
15861 int IncOffset = VT.getSizeInBits() / 8;
15862 int IncValue = IncOffset;
15863
15864 // Walk (both up and down) the chain looking for another load at the real
15865 // (aligned) offset (the alignment of the other load does not matter in
15866 // this case). If found, then do not use the offset reduction trick, as
15867 // that will prevent the loads from being later combined (as they would
15868 // otherwise be duplicates).
15869 if (!findConsecutiveLoad(LD, DAG))
15870 --IncValue;
15871
15872 SDValue Increment =
15874 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15875
15877 MF.getMachineMemOperand(LD->getMemOperand(),
15878 1, 2*MemVT.getStoreSize()-1);
15879 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15882 DAG.getVTList(PermTy, MVT::Other),
15884
15885 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15886 BaseLoad.getValue(1), ExtraLoad.getValue(1));
15887
15888 // Because vperm has a big-endian bias, we must reverse the order
15889 // of the input vectors and complement the permute control vector
15890 // when generating little endian code. We have already handled the
15891 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15892 // and ExtraLoad here.
15893 SDValue Perm;
15894 if (isLittleEndian)
15896 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15897 else
15899 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15900
15901 if (VT != PermTy)
15902 Perm = Subtarget.hasAltivec()
15903 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15904 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15905 DAG.getTargetConstant(1, dl, MVT::i64));
15906 // second argument is 1 because this rounding
15907 // is always exact.
15908
15909 // The output of the permutation is our loaded result, the TokenFactor is
15910 // our new chain.
15911 DCI.CombineTo(N, Perm, TF);
15912 return SDValue(N, 0);
15913 }
15914 }
15915 break;
15917 bool isLittleEndian = Subtarget.isLittleEndian();
15918 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15919 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15920 : Intrinsic::ppc_altivec_lvsl);
15921 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15922 SDValue Add = N->getOperand(1);
15923
15924 int Bits = 4 /* 16 byte alignment */;
15925
15926 if (DAG.MaskedValueIsZero(Add->getOperand(1),
15927 APInt::getAllOnes(Bits /* alignment */)
15928 .zext(Add.getScalarValueSizeInBits()))) {
15929 SDNode *BasePtr = Add->getOperand(0).getNode();
15930 for (SDNode *U : BasePtr->uses()) {
15931 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15932 cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15933 // We've found another LVSL/LVSR, and this address is an aligned
15934 // multiple of that one. The results will be the same, so use the
15935 // one we've just found instead.
15936
15937 return SDValue(U, 0);
15938 }
15939 }
15940 }
15941
15942 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15943 SDNode *BasePtr = Add->getOperand(0).getNode();
15944 for (SDNode *U : BasePtr->uses()) {
15945 if (U->getOpcode() == ISD::ADD &&
15946 isa<ConstantSDNode>(U->getOperand(1)) &&
15947 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15948 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15949 (1ULL << Bits) ==
15950 0) {
15951 SDNode *OtherAdd = U;
15952 for (SDNode *V : OtherAdd->uses()) {
15953 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15954 cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15955 IID) {
15956 return SDValue(V, 0);
15957 }
15958 }
15959 }
15960 }
15961 }
15962 }
15963
15964 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15965 // Expose the vabsduw/h/b opportunity for down stream
15966 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15967 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15968 IID == Intrinsic::ppc_altivec_vmaxsh ||
15969 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15970 SDValue V1 = N->getOperand(1);
15971 SDValue V2 = N->getOperand(2);
15972 if ((V1.getSimpleValueType() == MVT::v4i32 ||
15973 V1.getSimpleValueType() == MVT::v8i16 ||
15974 V1.getSimpleValueType() == MVT::v16i8) &&
15975 V1.getSimpleValueType() == V2.getSimpleValueType()) {
15976 // (0-a, a)
15977 if (V1.getOpcode() == ISD::SUB &&
15979 V1.getOperand(1) == V2) {
15980 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15981 }
15982 // (a, 0-a)
15983 if (V2.getOpcode() == ISD::SUB &&
15984 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15985 V2.getOperand(1) == V1) {
15986 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15987 }
15988 // (x-y, y-x)
15989 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15990 V1.getOperand(0) == V2.getOperand(1) &&
15991 V1.getOperand(1) == V2.getOperand(0)) {
15992 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15993 }
15994 }
15995 }
15996 }
15997
15998 break;
16000 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16001 default:
16002 break;
16003 case Intrinsic::ppc_altivec_vsum4sbs:
16004 case Intrinsic::ppc_altivec_vsum4shs:
16005 case Intrinsic::ppc_altivec_vsum4ubs: {
16006 // These sum-across intrinsics only have a chain due to the side effect
16007 // that they may set the SAT bit. If we know the SAT bit will not be set
16008 // for some inputs, we can replace any uses of their chain with the input
16009 // chain.
16010 if (BuildVectorSDNode *BVN =
16011 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16013 unsigned SplatBitSize;
16014 bool HasAnyUndefs;
16015 bool BVNIsConstantSplat = BVN->isConstantSplat(
16016 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16017 !Subtarget.isLittleEndian());
16018 // If the constant splat vector is 0, the SAT bit will not be set.
16019 if (BVNIsConstantSplat && APSplatBits == 0)
16021 }
16022 return SDValue();
16023 }
16024 case Intrinsic::ppc_vsx_lxvw4x:
16025 case Intrinsic::ppc_vsx_lxvd2x:
16026 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16027 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16028 if (Subtarget.needsSwapsForVSXMemOps())
16029 return expandVSXLoadForLE(N, DCI);
16030 break;
16031 }
16032 break;
16034 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16035 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16036 if (Subtarget.needsSwapsForVSXMemOps()) {
16037 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16038 default:
16039 break;
16040 case Intrinsic::ppc_vsx_stxvw4x:
16041 case Intrinsic::ppc_vsx_stxvd2x:
16042 return expandVSXStoreForLE(N, DCI);
16043 }
16044 }
16045 break;
16046 case ISD::BSWAP: {
16047 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16048 // For subtargets without LDBRX, we can still do better than the default
16049 // expansion even for 64-bit BSWAP (LOAD).
16051 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16052 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16053 N->getOperand(0).hasOneUse();
16054 if (IsSingleUseNormalLd &&
16055 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16056 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16057 SDValue Load = N->getOperand(0);
16058 LoadSDNode *LD = cast<LoadSDNode>(Load);
16059 // Create the byte-swapping load.
16060 SDValue Ops[] = {
16061 LD->getChain(), // Chain
16062 LD->getBasePtr(), // Ptr
16063 DAG.getValueType(N->getValueType(0)) // VT
16064 };
16065 SDValue BSLoad =
16067 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16068 MVT::i64 : MVT::i32, MVT::Other),
16069 Ops, LD->getMemoryVT(), LD->getMemOperand());
16070
16071 // If this is an i16 load, insert the truncate.
16073 if (N->getValueType(0) == MVT::i16)
16074 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16075
16076 // First, combine the bswap away. This makes the value produced by the
16077 // load dead.
16078 DCI.CombineTo(N, ResVal);
16079
16080 // Next, combine the load away, we give it a bogus result value but a real
16081 // chain result. The result value is dead because the bswap is dead.
16082 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16083
16084 // Return N so it doesn't get rechecked!
16085 return SDValue(N, 0);
16086 }
16087 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16088 // before legalization so that the BUILD_PAIR is handled correctly.
16089 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16091 return SDValue();
16092 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16093
16094 // Can't split volatile or atomic loads.
16095 if (!LD->isSimple())
16096 return SDValue();
16097 SDValue BasePtr = LD->getBasePtr();
16098 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16099 LD->getPointerInfo(), LD->getAlign());
16100 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16101 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16102 DAG.getIntPtrConstant(4, dl));
16104 LD->getMemOperand(), 4, 4);
16105 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16106 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16107 SDValue Res;
16108 if (Subtarget.isLittleEndian())
16109 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16110 else
16111 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16112 SDValue TF =
16113 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16114 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16115 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16116 return Res;
16117 }
16118 case PPCISD::VCMP:
16119 // If a VCMP_rec node already exists with exactly the same operands as this
16120 // node, use its result instead of this node (VCMP_rec computes both a CR6
16121 // and a normal output).
16122 //
16123 if (!N->getOperand(0).hasOneUse() &&
16124 !N->getOperand(1).hasOneUse() &&
16125 !N->getOperand(2).hasOneUse()) {
16126
16127 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16128 SDNode *VCMPrecNode = nullptr;
16129
16130 SDNode *LHSN = N->getOperand(0).getNode();
16131 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16132 UI != E; ++UI)
16133 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16134 UI->getOperand(1) == N->getOperand(1) &&
16135 UI->getOperand(2) == N->getOperand(2) &&
16136 UI->getOperand(0) == N->getOperand(0)) {
16137 VCMPrecNode = *UI;
16138 break;
16139 }
16140
16141 // If there is no VCMP_rec node, or if the flag value has a single use,
16142 // don't transform this.
16143 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16144 break;
16145
16146 // Look at the (necessarily single) use of the flag value. If it has a
16147 // chain, this transformation is more complex. Note that multiple things
16148 // could use the value result, which we should ignore.
16149 SDNode *FlagUser = nullptr;
16150 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16151 FlagUser == nullptr; ++UI) {
16152 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16153 SDNode *User = *UI;
16154 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16155 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16156 FlagUser = User;
16157 break;
16158 }
16159 }
16160 }
16161
16162 // If the user is a MFOCRF instruction, we know this is safe.
16163 // Otherwise we give up for right now.
16164 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16165 return SDValue(VCMPrecNode, 0);
16166 }
16167 break;
16168 case ISD::BR_CC: {
16169 // If this is a branch on an altivec predicate comparison, lower this so
16170 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16171 // lowering is done pre-legalize, because the legalizer lowers the predicate
16172 // compare down to code that is difficult to reassemble.
16173 // This code also handles branches that depend on the result of a store
16174 // conditional.
16175 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16176 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16177
16178 int CompareOpc;
16179 bool isDot;
16180
16181 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16182 break;
16183
16184 // Since we are doing this pre-legalize, the RHS can be a constant of
16185 // arbitrary bitwidth which may cause issues when trying to get the value
16186 // from the underlying APInt.
16187 auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue();
16188 if (!RHSAPInt.isIntN(64))
16189 break;
16190
16191 unsigned Val = RHSAPInt.getZExtValue();
16192 auto isImpossibleCompare = [&]() {
16193 // If this is a comparison against something other than 0/1, then we know
16194 // that the condition is never/always true.
16195 if (Val != 0 && Val != 1) {
16196 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16197 return N->getOperand(0);
16198 // Always !=, turn it into an unconditional branch.
16199 return DAG.getNode(ISD::BR, dl, MVT::Other,
16200 N->getOperand(0), N->getOperand(4));
16201 }
16202 return SDValue();
16203 };
16204 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16205 unsigned StoreWidth = 0;
16206 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16208 if (SDValue Impossible = isImpossibleCompare())
16209 return Impossible;
16211 // eq 0 => ne
16212 // ne 0 => eq
16213 // eq 1 => eq
16214 // ne 1 => ne
16215 if (Val == 0)
16217 else
16219
16220 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16221 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16222 auto *MemNode = cast<MemSDNode>(LHS);
16225 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16226 MemNode->getMemoryVT(), MemNode->getMemOperand());
16227
16229 // Unchain the branch from the original store conditional.
16230 if (N->getOperand(0) == LHS.getValue(1))
16231 InChain = LHS.getOperand(0);
16232 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16234 SDValue InTF = N->getOperand(0);
16235 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16236 if (InTF.getOperand(i) != LHS.getValue(1))
16237 InChains.push_back(InTF.getOperand(i));
16238 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16239 }
16240
16241 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16242 DAG.getConstant(CompOpc, dl, MVT::i32),
16243 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16244 ConstSt.getValue(2));
16245 }
16246
16247 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16248 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16249 assert(isDot && "Can't compare against a vector result!");
16250
16251 if (SDValue Impossible = isImpossibleCompare())
16252 return Impossible;
16253
16254 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16255 // Create the PPCISD altivec 'dot' comparison node.
16256 SDValue Ops[] = {
16257 LHS.getOperand(2), // LHS of compare
16258 LHS.getOperand(3), // RHS of compare
16259 DAG.getConstant(CompareOpc, dl, MVT::i32)
16260 };
16261 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16262 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16263
16264 // Unpack the result based on how the target uses it.
16266 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
16267 default: // Can't happen, don't crash on invalid number though.
16268 case 0: // Branch on the value of the EQ bit of CR6.
16270 break;
16271 case 1: // Branch on the inverted value of the EQ bit of CR6.
16273 break;
16274 case 2: // Branch on the value of the LT bit of CR6.
16276 break;
16277 case 3: // Branch on the inverted value of the LT bit of CR6.
16279 break;
16280 }
16281
16282 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16283 DAG.getConstant(CompOpc, dl, MVT::i32),
16284 DAG.getRegister(PPC::CR6, MVT::i32),
16285 N->getOperand(4), CompNode.getValue(1));
16286 }
16287 break;
16288 }
16289 case ISD::BUILD_VECTOR:
16290 return DAGCombineBuildVector(N, DCI);
16291 }
16292
16293 return SDValue();
16294}
16295
16296SDValue
16298 SelectionDAG &DAG,
16299 SmallVectorImpl<SDNode *> &Created) const {
16300 // fold (sdiv X, pow2)
16301 EVT VT = N->getValueType(0);
16302 if (VT == MVT::i64 && !Subtarget.isPPC64())
16303 return SDValue();
16304 if ((VT != MVT::i32 && VT != MVT::i64) ||
16305 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16306 return SDValue();
16307
16308 SDLoc DL(N);
16309 SDValue N0 = N->getOperand(0);
16310
16311 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16312 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16313 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16314
16315 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16316 Created.push_back(Op.getNode());
16317
16318 if (IsNegPow2) {
16319 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16320 Created.push_back(Op.getNode());
16321 }
16322
16323 return Op;
16324}
16325
16326//===----------------------------------------------------------------------===//
16327// Inline Assembly Support
16328//===----------------------------------------------------------------------===//
16329
16331 KnownBits &Known,
16332 const APInt &DemandedElts,
16333 const SelectionDAG &DAG,
16334 unsigned Depth) const {
16335 Known.resetAll();
16336 switch (Op.getOpcode()) {
16337 default: break;
16338 case PPCISD::LBRX: {
16339 // lhbrx is known to have the top bits cleared out.
16340 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16341 Known.Zero = 0xFFFF0000;
16342 break;
16343 }
16345 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
16346 default: break;
16347 case Intrinsic::ppc_altivec_vcmpbfp_p:
16348 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16349 case Intrinsic::ppc_altivec_vcmpequb_p:
16350 case Intrinsic::ppc_altivec_vcmpequh_p:
16351 case Intrinsic::ppc_altivec_vcmpequw_p:
16352 case Intrinsic::ppc_altivec_vcmpequd_p:
16353 case Intrinsic::ppc_altivec_vcmpequq_p:
16354 case Intrinsic::ppc_altivec_vcmpgefp_p:
16355 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16356 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16357 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16358 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16359 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16360 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16361 case Intrinsic::ppc_altivec_vcmpgtub_p:
16362 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16363 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16364 case Intrinsic::ppc_altivec_vcmpgtud_p:
16365 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16366 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16367 break;
16368 }
16369 break;
16370 }
16372 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
16373 default:
16374 break;
16375 case Intrinsic::ppc_load2r:
16376 // Top bits are cleared for load2r (which is the same as lhbrx).
16377 Known.Zero = 0xFFFF0000;
16378 break;
16379 }
16380 break;
16381 }
16382 }
16383}
16384
16386 switch (Subtarget.getCPUDirective()) {
16387 default: break;
16388 case PPC::DIR_970:
16389 case PPC::DIR_PWR4:
16390 case PPC::DIR_PWR5:
16391 case PPC::DIR_PWR5X:
16392 case PPC::DIR_PWR6:
16393 case PPC::DIR_PWR6X:
16394 case PPC::DIR_PWR7:
16395 case PPC::DIR_PWR8:
16396 case PPC::DIR_PWR9:
16397 case PPC::DIR_PWR10:
16398 case PPC::DIR_PWR_FUTURE: {
16399 if (!ML)
16400 break;
16401
16403 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16404 // so that we can decrease cache misses and branch-prediction misses.
16405 // Actual alignment of the loop will depend on the hotness check and other
16406 // logic in alignBlocks.
16407 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16408 return Align(32);
16409 }
16410
16411 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16412
16413 // For small loops (between 5 and 8 instructions), align to a 32-byte
16414 // boundary so that the entire loop fits in one instruction-cache line.
16415 uint64_t LoopSize = 0;
16416 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16417 for (const MachineInstr &J : **I) {
16418 LoopSize += TII->getInstSizeInBytes(J);
16419 if (LoopSize > 32)
16420 break;
16421 }
16422
16423 if (LoopSize > 16 && LoopSize <= 32)
16424 return Align(32);
16425
16426 break;
16427 }
16428 }
16429
16431}
16432
16433/// getConstraintType - Given a constraint, return the type of
16434/// constraint it is for this target.
16437 if (Constraint.size() == 1) {
16438 switch (Constraint[0]) {
16439 default: break;
16440 case 'b':
16441 case 'r':
16442 case 'f':
16443 case 'd':
16444 case 'v':
16445 case 'y':
16446 return C_RegisterClass;
16447 case 'Z':
16448 // FIXME: While Z does indicate a memory constraint, it specifically
16449 // indicates an r+r address (used in conjunction with the 'y' modifier
16450 // in the replacement string). Currently, we're forcing the base
16451 // register to be r0 in the asm printer (which is interpreted as zero)
16452 // and forming the complete address in the second register. This is
16453 // suboptimal.
16454 return C_Memory;
16455 }
16456 } else if (Constraint == "wc") { // individual CR bits.
16457 return C_RegisterClass;
16458 } else if (Constraint == "wa" || Constraint == "wd" ||
16459 Constraint == "wf" || Constraint == "ws" ||
16460 Constraint == "wi" || Constraint == "ww") {
16461 return C_RegisterClass; // VSX registers.
16462 }
16463 return TargetLowering::getConstraintType(Constraint);
16464}
16465
16466/// Examine constraint type and operand type and determine a weight value.
16467/// This object must already have been set up with the operand type
16468/// and the current alternative constraint selected.
16471 AsmOperandInfo &info, const char *constraint) const {
16473 Value *CallOperandVal = info.CallOperandVal;
16474 // If we don't have a value, we can't do a match,
16475 // but allow it at the lowest weight.
16476 if (!CallOperandVal)
16477 return CW_Default;
16478 Type *type = CallOperandVal->getType();
16479
16480 // Look at the constraint type.
16481 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16482 return CW_Register; // an individual CR bit.
16483 else if ((StringRef(constraint) == "wa" ||
16484 StringRef(constraint) == "wd" ||
16485 StringRef(constraint) == "wf") &&
16486 type->isVectorTy())
16487 return CW_Register;
16488 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16489 return CW_Register; // just hold 64-bit integers data.
16490 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16491 return CW_Register;
16492 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16493 return CW_Register;
16494
16495 switch (*constraint) {
16496 default:
16498 break;
16499 case 'b':
16500 if (type->isIntegerTy())
16501 weight = CW_Register;
16502 break;
16503 case 'f':
16504 if (type->isFloatTy())
16505 weight = CW_Register;
16506 break;
16507 case 'd':
16508 if (type->isDoubleTy())
16509 weight = CW_Register;
16510 break;
16511 case 'v':
16512 if (type->isVectorTy())
16513 weight = CW_Register;
16514 break;
16515 case 'y':
16516 weight = CW_Register;
16517 break;
16518 case 'Z':
16519 weight = CW_Memory;
16520 break;
16521 }
16522 return weight;
16523}
16524
16525std::pair<unsigned, const TargetRegisterClass *>
16527 StringRef Constraint,
16528 MVT VT) const {
16529 if (Constraint.size() == 1) {
16530 // GCC RS6000 Constraint Letters
16531 switch (Constraint[0]) {
16532 case 'b': // R1-R31
16533 if (VT == MVT::i64 && Subtarget.isPPC64())
16534 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16535 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16536 case 'r': // R0-R31
16537 if (VT == MVT::i64 && Subtarget.isPPC64())
16538 return std::make_pair(0U, &PPC::G8RCRegClass);
16539 return std::make_pair(0U, &PPC::GPRCRegClass);
16540 // 'd' and 'f' constraints are both defined to be "the floating point
16541 // registers", where one is for 32-bit and the other for 64-bit. We don't
16542 // really care overly much here so just give them all the same reg classes.
16543 case 'd':
16544 case 'f':
16545 if (Subtarget.hasSPE()) {
16546 if (VT == MVT::f32 || VT == MVT::i32)
16547 return std::make_pair(0U, &PPC::GPRCRegClass);
16548 if (VT == MVT::f64 || VT == MVT::i64)
16549 return std::make_pair(0U, &PPC::SPERCRegClass);
16550 } else {
16551 if (VT == MVT::f32 || VT == MVT::i32)
16552 return std::make_pair(0U, &PPC::F4RCRegClass);
16553 if (VT == MVT::f64 || VT == MVT::i64)
16554 return std::make_pair(0U, &PPC::F8RCRegClass);
16555 }
16556 break;
16557 case 'v':
16558 if (Subtarget.hasAltivec() && VT.isVector())
16559 return std::make_pair(0U, &PPC::VRRCRegClass);
16560 else if (Subtarget.hasVSX())
16561 // Scalars in Altivec registers only make sense with VSX.
16562 return std::make_pair(0U, &PPC::VFRCRegClass);
16563 break;
16564 case 'y': // crrc
16565 return std::make_pair(0U, &PPC::CRRCRegClass);
16566 }
16567 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16568 // An individual CR bit.
16569 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16570 } else if ((Constraint == "wa" || Constraint == "wd" ||
16571 Constraint == "wf" || Constraint == "wi") &&
16572 Subtarget.hasVSX()) {
16573 // A VSX register for either a scalar (FP) or vector. There is no
16574 // support for single precision scalars on subtargets prior to Power8.
16575 if (VT.isVector())
16576 return std::make_pair(0U, &PPC::VSRCRegClass);
16577 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16578 return std::make_pair(0U, &PPC::VSSRCRegClass);
16579 return std::make_pair(0U, &PPC::VSFRCRegClass);
16580 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16581 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16582 return std::make_pair(0U, &PPC::VSSRCRegClass);
16583 else
16584 return std::make_pair(0U, &PPC::VSFRCRegClass);
16585 } else if (Constraint == "lr") {
16586 if (VT == MVT::i64)
16587 return std::make_pair(0U, &PPC::LR8RCRegClass);
16588 else
16589 return std::make_pair(0U, &PPC::LRRCRegClass);
16590 }
16591
16592 // Handle special cases of physical registers that are not properly handled
16593 // by the base class.
16594 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16595 // If we name a VSX register, we can't defer to the base class because it
16596 // will not recognize the correct register (their names will be VSL{0-31}
16597 // and V{0-31} so they won't match). So we match them here.
16598 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16599 int VSNum = atoi(Constraint.data() + 3);
16600 assert(VSNum >= 0 && VSNum <= 63 &&
16601 "Attempted to access a vsr out of range");
16602 if (VSNum < 32)
16603 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16604 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16605 }
16606
16607 // For float registers, we can't defer to the base class as it will match
16608 // the SPILLTOVSRRC class.
16609 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16610 int RegNum = atoi(Constraint.data() + 2);
16611 if (RegNum > 31 || RegNum < 0)
16612 report_fatal_error("Invalid floating point register number");
16613 if (VT == MVT::f32 || VT == MVT::i32)
16614 return Subtarget.hasSPE()
16615 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16616 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16617 if (VT == MVT::f64 || VT == MVT::i64)
16618 return Subtarget.hasSPE()
16619 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16620 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16621 }
16622 }
16623
16624 std::pair<unsigned, const TargetRegisterClass *> R =
16626
16627 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16628 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16629 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16630 // register.
16631 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16632 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16633 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16634 PPC::GPRCRegClass.contains(R.first))
16635 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16636 PPC::sub_32, &PPC::G8RCRegClass),
16637 &PPC::G8RCRegClass);
16638
16639 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16640 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16641 R.first = PPC::CR0;
16642 R.second = &PPC::CRRCRegClass;
16643 }
16644 // FIXME: This warning should ideally be emitted in the front end.
16645 const auto &TM = getTargetMachine();
16646 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16647 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16648 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16649 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16650 errs() << "warning: vector registers 20 to 32 are reserved in the "
16651 "default AIX AltiVec ABI and cannot be used\n";
16652 }
16653
16654 return R;
16655}
16656
16657/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16658/// vector. If it is invalid, don't add anything to Ops.
16660 std::string &Constraint,
16661 std::vector<SDValue>&Ops,
16662 SelectionDAG &DAG) const {
16663 SDValue Result;
16664
16665 // Only support length 1 constraints.
16666 if (Constraint.length() > 1) return;
16667
16668 char Letter = Constraint[0];
16669 switch (Letter) {
16670 default: break;
16671 case 'I':
16672 case 'J':
16673 case 'K':
16674 case 'L':
16675 case 'M':
16676 case 'N':
16677 case 'O':
16678 case 'P': {
16680 if (!CST) return; // Must be an immediate to match.
16681 SDLoc dl(Op);
16682 int64_t Value = CST->getSExtValue();
16683 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16684 // numbers are printed as such.
16685 switch (Letter) {
16686 default: llvm_unreachable("Unknown constraint letter!");
16687 case 'I': // "I" is a signed 16-bit constant.
16688 if (isInt<16>(Value))
16689 Result = DAG.getTargetConstant(Value, dl, TCVT);
16690 break;
16691 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16693 Result = DAG.getTargetConstant(Value, dl, TCVT);
16694 break;
16695 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16697 Result = DAG.getTargetConstant(Value, dl, TCVT);
16698 break;
16699 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16700 if (isUInt<16>(Value))
16701 Result = DAG.getTargetConstant(Value, dl, TCVT);
16702 break;
16703 case 'M': // "M" is a constant that is greater than 31.
16704 if (Value > 31)
16705 Result = DAG.getTargetConstant(Value, dl, TCVT);
16706 break;
16707 case 'N': // "N" is a positive constant that is an exact power of two.
16708 if (Value > 0 && isPowerOf2_64(Value))
16709 Result = DAG.getTargetConstant(Value, dl, TCVT);
16710 break;
16711 case 'O': // "O" is the constant zero.
16712 if (Value == 0)
16713 Result = DAG.getTargetConstant(Value, dl, TCVT);
16714 break;
16715 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16716 if (isInt<16>(-Value))
16717 Result = DAG.getTargetConstant(Value, dl, TCVT);
16718 break;
16719 }
16720 break;
16721 }
16722 }
16723
16724 if (Result.getNode()) {
16725 Ops.push_back(Result);
16726 return;
16727 }
16728
16729 // Handle standard constraint letters.
16730 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16731}
16732
16735 SelectionDAG &DAG) const {
16736 if (I.getNumOperands() <= 1)
16737 return;
16738 if (!isa<ConstantSDNode>(Ops[1].getNode()))
16739 return;
16740 auto IntrinsicID = cast<ConstantSDNode>(Ops[1].getNode())->getZExtValue();
16741 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16742 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16743 return;
16744
16745 if (I.hasMetadata("annotation")) {
16746 MDNode *MDN = I.getMetadata("annotation");
16747 Ops.push_back(DAG.getMDNode(MDN));
16748 }
16749}
16750
16751// isLegalAddressingMode - Return true if the addressing mode represented
16752// by AM is legal for this target, for a load/store of the specified type.
16754 const AddrMode &AM, Type *Ty,
16755 unsigned AS,
16756 Instruction *I) const {
16757 // Vector type r+i form is supported since power9 as DQ form. We don't check
16758 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16759 // imm form is preferred and the offset can be adjusted to use imm form later
16760 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16761 // max offset to check legal addressing mode, we should be a little aggressive
16762 // to contain other offsets for that LSRUse.
16763 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16764 return false;
16765
16766 // PPC allows a sign-extended 16-bit immediate field.
16767 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16768 return false;
16769
16770 // No global is ever allowed as a base.
16771 if (AM.BaseGV)
16772 return false;
16773
16774 // PPC only support r+r,
16775 switch (AM.Scale) {
16776 case 0: // "r+i" or just "i", depending on HasBaseReg.
16777 break;
16778 case 1:
16779 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16780 return false;
16781 // Otherwise we have r+r or r+i.
16782 break;
16783 case 2:
16784 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16785 return false;
16786 // Allow 2*r as r+r.
16787 break;
16788 default:
16789 // No other scales are supported.
16790 return false;
16791 }
16792
16793 return true;
16794}
16795
16796SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16797 SelectionDAG &DAG) const {
16799 MachineFrameInfo &MFI = MF.getFrameInfo();
16800 MFI.setReturnAddressIsTaken(true);
16801
16803 return SDValue();
16804
16805 SDLoc dl(Op);
16806 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16807
16808 // Make sure the function does not optimize away the store of the RA to
16809 // the stack.
16810 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16811 FuncInfo->setLRStoreRequired();
16812 bool isPPC64 = Subtarget.isPPC64();
16813 auto PtrVT = getPointerTy(MF.getDataLayout());
16814
16815 if (Depth > 0) {
16816 // The link register (return address) is saved in the caller's frame
16817 // not the callee's stack frame. So we must get the caller's frame
16818 // address and load the return address at the LR offset from there.
16820 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16821 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16822 SDValue Offset =
16823 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16824 isPPC64 ? MVT::i64 : MVT::i32);
16825 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16828 }
16829
16830 // Just load the return address off the stack.
16831 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16832 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16834}
16835
16836SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16837 SelectionDAG &DAG) const {
16838 SDLoc dl(Op);
16839 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16840
16842 MachineFrameInfo &MFI = MF.getFrameInfo();
16843 MFI.setFrameAddressIsTaken(true);
16844
16846 bool isPPC64 = PtrVT == MVT::i64;
16847
16848 // Naked functions never have a frame pointer, and so we use r1. For all
16849 // other functions, this decision must be delayed until during PEI.
16850 unsigned FrameReg;
16851 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16852 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16853 else
16854 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16855
16856 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16857 PtrVT);
16858 while (Depth--)
16859 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16861 return FrameAddr;
16862}
16863
16864// FIXME? Maybe this could be a TableGen attribute on some registers and
16865// this table could be generated automatically from RegInfo.
16867 const MachineFunction &MF) const {
16868 bool isPPC64 = Subtarget.isPPC64();
16869
16870 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16871 if (!is64Bit && VT != LLT::scalar(32))
16872 report_fatal_error("Invalid register global variable type");
16873
16875 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16876 .Case("r2", isPPC64 ? Register() : PPC::R2)
16877 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16878 .Default(Register());
16879
16880 if (Reg)
16881 return Reg;
16882 report_fatal_error("Invalid register name global variable");
16883}
16884
16886 // 32-bit SVR4 ABI access everything as got-indirect.
16887 if (Subtarget.is32BitELFABI())
16888 return true;
16889
16890 // AIX accesses everything indirectly through the TOC, which is similar to
16891 // the GOT.
16892 if (Subtarget.isAIXABI())
16893 return true;
16894
16896 // If it is small or large code model, module locals are accessed
16897 // indirectly by loading their address from .toc/.got.
16899 return true;
16900
16901 // JumpTable and BlockAddress are accessed as got-indirect.
16903 return true;
16904
16906 return Subtarget.isGVIndirectSymbol(G->getGlobal());
16907
16908 return false;
16909}
16910
16911bool
16913 // The PowerPC target isn't yet aware of offsets.
16914 return false;
16915}
16916
16918 const CallInst &I,
16919 MachineFunction &MF,
16920 unsigned Intrinsic) const {
16921 switch (Intrinsic) {
16922 case Intrinsic::ppc_atomicrmw_xchg_i128:
16923 case Intrinsic::ppc_atomicrmw_add_i128:
16924 case Intrinsic::ppc_atomicrmw_sub_i128:
16925 case Intrinsic::ppc_atomicrmw_nand_i128:
16926 case Intrinsic::ppc_atomicrmw_and_i128:
16927 case Intrinsic::ppc_atomicrmw_or_i128:
16928 case Intrinsic::ppc_atomicrmw_xor_i128:
16929 case Intrinsic::ppc_cmpxchg_i128:
16930 Info.opc = ISD::INTRINSIC_W_CHAIN;
16931 Info.memVT = MVT::i128;
16932 Info.ptrVal = I.getArgOperand(0);
16933 Info.offset = 0;
16934 Info.align = Align(16);
16937 return true;
16938 case Intrinsic::ppc_atomic_load_i128:
16939 Info.opc = ISD::INTRINSIC_W_CHAIN;
16940 Info.memVT = MVT::i128;
16941 Info.ptrVal = I.getArgOperand(0);
16942 Info.offset = 0;
16943 Info.align = Align(16);
16945 return true;
16946 case Intrinsic::ppc_atomic_store_i128:
16947 Info.opc = ISD::INTRINSIC_VOID;
16948 Info.memVT = MVT::i128;
16949 Info.ptrVal = I.getArgOperand(2);
16950 Info.offset = 0;
16951 Info.align = Align(16);
16953 return true;
16954 case Intrinsic::ppc_altivec_lvx:
16955 case Intrinsic::ppc_altivec_lvxl:
16956 case Intrinsic::ppc_altivec_lvebx:
16957 case Intrinsic::ppc_altivec_lvehx:
16958 case Intrinsic::ppc_altivec_lvewx:
16959 case Intrinsic::ppc_vsx_lxvd2x:
16960 case Intrinsic::ppc_vsx_lxvw4x:
16961 case Intrinsic::ppc_vsx_lxvd2x_be:
16962 case Intrinsic::ppc_vsx_lxvw4x_be:
16963 case Intrinsic::ppc_vsx_lxvl:
16964 case Intrinsic::ppc_vsx_lxvll: {
16965 EVT VT;
16966 switch (Intrinsic) {
16967 case Intrinsic::ppc_altivec_lvebx:
16968 VT = MVT::i8;
16969 break;
16970 case Intrinsic::ppc_altivec_lvehx:
16971 VT = MVT::i16;
16972 break;
16973 case Intrinsic::ppc_altivec_lvewx:
16974 VT = MVT::i32;
16975 break;
16976 case Intrinsic::ppc_vsx_lxvd2x:
16977 case Intrinsic::ppc_vsx_lxvd2x_be:
16978 VT = MVT::v2f64;
16979 break;
16980 default:
16981 VT = MVT::v4i32;
16982 break;
16983 }
16984
16985 Info.opc = ISD::INTRINSIC_W_CHAIN;
16986 Info.memVT = VT;
16987 Info.ptrVal = I.getArgOperand(0);
16988 Info.offset = -VT.getStoreSize()+1;
16989 Info.size = 2*VT.getStoreSize()-1;
16990 Info.align = Align(1);
16991 Info.flags = MachineMemOperand::MOLoad;
16992 return true;
16993 }
16994 case Intrinsic::ppc_altivec_stvx:
16995 case Intrinsic::ppc_altivec_stvxl:
16996 case Intrinsic::ppc_altivec_stvebx:
16997 case Intrinsic::ppc_altivec_stvehx:
16998 case Intrinsic::ppc_altivec_stvewx:
16999 case Intrinsic::ppc_vsx_stxvd2x:
17000 case Intrinsic::ppc_vsx_stxvw4x:
17001 case Intrinsic::ppc_vsx_stxvd2x_be:
17002 case Intrinsic::ppc_vsx_stxvw4x_be:
17003 case Intrinsic::ppc_vsx_stxvl:
17004 case Intrinsic::ppc_vsx_stxvll: {
17005 EVT VT;
17006 switch (Intrinsic) {
17007 case Intrinsic::ppc_altivec_stvebx:
17008 VT = MVT::i8;
17009 break;
17010 case Intrinsic::ppc_altivec_stvehx:
17011 VT = MVT::i16;
17012 break;
17013 case Intrinsic::ppc_altivec_stvewx:
17014 VT = MVT::i32;
17015 break;
17016 case Intrinsic::ppc_vsx_stxvd2x:
17017 case Intrinsic::ppc_vsx_stxvd2x_be:
17018 VT = MVT::v2f64;
17019 break;
17020 default:
17021 VT = MVT::v4i32;
17022 break;
17023 }
17024
17025 Info.opc = ISD::INTRINSIC_VOID;
17026 Info.memVT = VT;
17027 Info.ptrVal = I.getArgOperand(1);
17028 Info.offset = -VT.getStoreSize()+1;
17029 Info.size = 2*VT.getStoreSize()-1;
17030 Info.align = Align(1);
17031 Info.flags = MachineMemOperand::MOStore;
17032 return true;
17033 }
17034 case Intrinsic::ppc_stdcx:
17035 case Intrinsic::ppc_stwcx:
17036 case Intrinsic::ppc_sthcx:
17037 case Intrinsic::ppc_stbcx: {
17038 EVT VT;
17039 auto Alignment = Align(8);
17040 switch (Intrinsic) {
17041 case Intrinsic::ppc_stdcx:
17042 VT = MVT::i64;
17043 break;
17044 case Intrinsic::ppc_stwcx:
17045 VT = MVT::i32;
17046 Alignment = Align(4);
17047 break;
17048 case Intrinsic::ppc_sthcx:
17049 VT = MVT::i16;
17050 Alignment = Align(2);
17051 break;
17052 case Intrinsic::ppc_stbcx:
17053 VT = MVT::i8;
17054 Alignment = Align(1);
17055 break;
17056 }
17057 Info.opc = ISD::INTRINSIC_W_CHAIN;
17058 Info.memVT = VT;
17059 Info.ptrVal = I.getArgOperand(0);
17060 Info.offset = 0;
17061 Info.align = Alignment;
17063 return true;
17064 }
17065 default:
17066 break;
17067 }
17068
17069 return false;
17070}
17071
17072/// It returns EVT::Other if the type should be determined using generic
17073/// target-independent logic.
17075 const MemOp &Op, const AttributeList &FuncAttributes) const {
17076 if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
17077 // We should use Altivec/VSX loads and stores when available. For unaligned
17078 // addresses, unaligned VSX loads are only fast starting with the P8.
17079 if (Subtarget.hasAltivec() && Op.size() >= 16 &&
17080 (Op.isAligned(Align(16)) ||
17081 ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
17082 return MVT::v4i32;
17083 }
17084
17085 if (Subtarget.isPPC64()) {
17086 return MVT::i64;
17087 }
17088
17089 return MVT::i32;
17090}
17091
17092/// Returns true if it is beneficial to convert a load of a constant
17093/// to just the constant itself.
17095 Type *Ty) const {
17096 assert(Ty->isIntegerTy());
17097
17098 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17099 return !(BitSize == 0 || BitSize > 64);
17100}
17101
17103 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17104 return false;
17105 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17106 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17107 return NumBits1 == 64 && NumBits2 == 32;
17108}
17109
17111 if (!VT1.isInteger() || !VT2.isInteger())
17112 return false;
17113 unsigned NumBits1 = VT1.getSizeInBits();
17114 unsigned NumBits2 = VT2.getSizeInBits();
17115 return NumBits1 == 64 && NumBits2 == 32;
17116}
17117
17119 // Generally speaking, zexts are not free, but they are free when they can be
17120 // folded with other operations.
17121 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17122 EVT MemVT = LD->getMemoryVT();
17123 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17124 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17125 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17126 LD->getExtensionType() == ISD::ZEXTLOAD))
17127 return true;
17128 }
17129
17130 // FIXME: Add other cases...
17131 // - 32-bit shifts with a zext to i64
17132 // - zext after ctlz, bswap, etc.
17133 // - zext after and by a constant mask
17134
17135 return TargetLowering::isZExtFree(Val, VT2);
17136}
17137
17139 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17140 "invalid fpext types");
17141 // Extending to float128 is not free.
17142 if (DestVT == MVT::f128)
17143 return false;
17144 return true;
17145}
17146
17148 return isInt<16>(Imm) || isUInt<16>(Imm);
17149}
17150
17152 return isInt<16>(Imm) || isUInt<16>(Imm);
17153}
17154
17157 unsigned *Fast) const {
17159 return false;
17160
17161 // PowerPC supports unaligned memory access for simple non-vector types.
17162 // Although accessing unaligned addresses is not as efficient as accessing
17163 // aligned addresses, it is generally more efficient than manual expansion,
17164 // and generally only traps for software emulation when crossing page
17165 // boundaries.
17166
17167 if (!VT.isSimple())
17168 return false;
17169
17170 if (VT.isFloatingPoint() && !VT.isVector() &&
17171 !Subtarget.allowsUnalignedFPAccess())
17172 return false;
17173
17174 if (VT.getSimpleVT().isVector()) {
17175 if (Subtarget.hasVSX()) {
17176 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17177 VT != MVT::v4f32 && VT != MVT::v4i32)
17178 return false;
17179 } else {
17180 return false;
17181 }
17182 }
17183
17184 if (VT == MVT::ppcf128)
17185 return false;
17186
17187 if (Fast)
17188 *Fast = 1;
17189
17190 return true;
17191}
17192
17194 SDValue C) const {
17195 // Check integral scalar types.
17196 if (!VT.isScalarInteger())
17197 return false;
17198 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17199 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17200 return false;
17201 // This transformation will generate >= 2 operations. But the following
17202 // cases will generate <= 2 instructions during ISEL. So exclude them.
17203 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17204 // HW instruction, ie. MULLI
17205 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17206 // instruction is needed than case 1, ie. MULLI and RLDICR
17207 int64_t Imm = ConstNode->getSExtValue();
17208 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17209 Imm >>= Shift;
17210 if (isInt<16>(Imm))
17211 return false;
17212 uint64_t UImm = static_cast<uint64_t>(Imm);
17213 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17214 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17215 return true;
17216 }
17217 return false;
17218}
17219
17225
17227 Type *Ty) const {
17228 if (Subtarget.hasSPE())
17229 return false;
17230 switch (Ty->getScalarType()->getTypeID()) {
17231 case Type::FloatTyID:
17232 case Type::DoubleTyID:
17233 return true;
17234 case Type::FP128TyID:
17235 return Subtarget.hasP9Vector();
17236 default:
17237 return false;
17238 }
17239}
17240
17241// FIXME: add more patterns which are not profitable to hoist.
17243 if (!I->hasOneUse())
17244 return true;
17245
17247 assert(User && "A single use instruction with no uses.");
17248
17249 switch (I->getOpcode()) {
17250 case Instruction::FMul: {
17251 // Don't break FMA, PowerPC prefers FMA.
17252 if (User->getOpcode() != Instruction::FSub &&
17253 User->getOpcode() != Instruction::FAdd)
17254 return true;
17255
17257 const Function *F = I->getFunction();
17258 const DataLayout &DL = F->getParent()->getDataLayout();
17259 Type *Ty = User->getOperand(0)->getType();
17260
17261 return !(
17264 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17265 }
17266 case Instruction::Load: {
17267 // Don't break "store (load float*)" pattern, this pattern will be combined
17268 // to "store (load int32)" in later InstCombine pass. See function
17269 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17270 // cycles than loading a 32 bit integer.
17271 LoadInst *LI = cast<LoadInst>(I);
17272 // For the loads that combineLoadToOperationType does nothing, like
17273 // ordered load, it should be profitable to hoist them.
17274 // For swifterror load, it can only be used for pointer to pointer type, so
17275 // later type check should get rid of this case.
17276 if (!LI->isUnordered())
17277 return true;
17278
17279 if (User->getOpcode() != Instruction::Store)
17280 return true;
17281
17282 if (I->getType()->getTypeID() != Type::FloatTyID)
17283 return true;
17284
17285 return false;
17286 }
17287 default:
17288 return true;
17289 }
17290 return true;
17291}
17292
17293const MCPhysReg *
17295 // LR is a callee-save register, but we must treat it as clobbered by any call
17296 // site. Hence we include LR in the scratch registers, which are in turn added
17297 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17298 // to CTR, which is used by any indirect call.
17299 static const MCPhysReg ScratchRegs[] = {
17300 PPC::X12, PPC::LR8, PPC::CTR8, 0
17301 };
17302
17303 return ScratchRegs;
17304}
17305
17307 const Constant *PersonalityFn) const {
17308 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17309}
17310
17312 const Constant *PersonalityFn) const {
17313 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17314}
17315
17316bool
17318 EVT VT , unsigned DefinedValues) const {
17319 if (VT == MVT::v2i64)
17320 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17321
17322 if (Subtarget.hasVSX())
17323 return true;
17324
17326}
17327
17334
17335// Create a fast isel object.
17336FastISel *
17338 const TargetLibraryInfo *LibInfo) const {
17339 return PPC::createFastISel(FuncInfo, LibInfo);
17340}
17341
17342// 'Inverted' means the FMA opcode after negating one multiplicand.
17343// For example, (fma -a b c) = (fnmsub a b c)
17344static unsigned invertFMAOpcode(unsigned Opc) {
17345 switch (Opc) {
17346 default:
17347 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17348 case ISD::FMA:
17349 return PPCISD::FNMSUB;
17350 case PPCISD::FNMSUB:
17351 return ISD::FMA;
17352 }
17353}
17354
17356 bool LegalOps, bool OptForSize,
17358 unsigned Depth) const {
17360 return SDValue();
17361
17362 unsigned Opc = Op.getOpcode();
17363 EVT VT = Op.getValueType();
17364 SDNodeFlags Flags = Op.getNode()->getFlags();
17365
17366 switch (Opc) {
17367 case PPCISD::FNMSUB:
17368 if (!Op.hasOneUse() || !isTypeLegal(VT))
17369 break;
17370
17372 SDValue N0 = Op.getOperand(0);
17373 SDValue N1 = Op.getOperand(1);
17374 SDValue N2 = Op.getOperand(2);
17375 SDLoc Loc(Op);
17376
17378 SDValue NegN2 =
17379 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17380
17381 if (!NegN2)
17382 return SDValue();
17383
17384 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17385 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17386 // These transformations may change sign of zeroes. For example,
17387 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17388 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17389 // Try and choose the cheaper one to negate.
17391 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17392 N0Cost, Depth + 1);
17393
17395 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17396 N1Cost, Depth + 1);
17397
17398 if (NegN0 && N0Cost <= N1Cost) {
17399 Cost = std::min(N0Cost, N2Cost);
17400 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17401 } else if (NegN1) {
17402 Cost = std::min(N1Cost, N2Cost);
17403 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17404 }
17405 }
17406
17407 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17408 if (isOperationLegal(ISD::FMA, VT)) {
17409 Cost = N2Cost;
17410 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17411 }
17412
17413 break;
17414 }
17415
17416 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17417 Cost, Depth);
17418}
17419
17420// Override to enable LOAD_STACK_GUARD lowering on Linux.
17422 if (!Subtarget.isTargetLinux())
17424 return true;
17425}
17426
17427// Override to disable global variable loading on Linux and insert AIX canary
17428// word declaration.
17430 if (Subtarget.isAIXABI()) {
17431 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17432 Type::getInt8PtrTy(M.getContext()));
17433 return;
17434 }
17435 if (!Subtarget.isTargetLinux())
17437}
17438
17440 if (Subtarget.isAIXABI())
17441 return M.getGlobalVariable(AIXSSPCanaryWordName);
17443}
17444
17446 bool ForCodeSize) const {
17447 if (!VT.isSimple() || !Subtarget.hasVSX())
17448 return false;
17449
17450 switch(VT.getSimpleVT().SimpleTy) {
17451 default:
17452 // For FP types that are currently not supported by PPC backend, return
17453 // false. Examples: f16, f80.
17454 return false;
17455 case MVT::f32:
17456 case MVT::f64: {
17457 if (Subtarget.hasPrefixInstrs()) {
17458 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17459 return true;
17460 }
17461 bool IsExact;
17462 APSInt IntResult(16, false);
17463 // The rounding mode doesn't really matter because we only care about floats
17464 // that can be converted to integers exactly.
17465 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17466 // For exact values in the range [-16, 15] we can materialize the float.
17467 if (IsExact && IntResult <= 15 && IntResult >= -16)
17468 return true;
17469 return Imm.isZero();
17470 }
17471 case MVT::ppcf128:
17472 return Imm.isPosZero();
17473 }
17474}
17475
17476// For vector shift operation op, fold
17477// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17479 SelectionDAG &DAG) {
17480 SDValue N0 = N->getOperand(0);
17481 SDValue N1 = N->getOperand(1);
17482 EVT VT = N0.getValueType();
17483 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17484 unsigned Opcode = N->getOpcode();
17485 unsigned TargetOpcode;
17486
17487 switch (Opcode) {
17488 default:
17489 llvm_unreachable("Unexpected shift operation");
17490 case ISD::SHL:
17491 TargetOpcode = PPCISD::SHL;
17492 break;
17493 case ISD::SRL:
17494 TargetOpcode = PPCISD::SRL;
17495 break;
17496 case ISD::SRA:
17497 TargetOpcode = PPCISD::SRA;
17498 break;
17499 }
17500
17501 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17502 N1->getOpcode() == ISD::AND)
17503 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17504 if (Mask->getZExtValue() == OpSizeInBits - 1)
17505 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17506
17507 return SDValue();
17508}
17509
17510SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17511 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17512 return Value;
17513
17514 SDValue N0 = N->getOperand(0);
17515 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17516 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17517 N0.getOpcode() != ISD::SIGN_EXTEND ||
17518 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17519 N->getValueType(0) != MVT::i64)
17520 return SDValue();
17521
17522 // We can't save an operation here if the value is already extended, and
17523 // the existing shift is easier to combine.
17524 SDValue ExtsSrc = N0.getOperand(0);
17525 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17526 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17527 return SDValue();
17528
17529 SDLoc DL(N0);
17530 SDValue ShiftBy = SDValue(CN1, 0);
17531 // We want the shift amount to be i32 on the extswli, but the shift could
17532 // have an i64.
17533 if (ShiftBy.getValueType() == MVT::i64)
17534 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17535
17536 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17537 ShiftBy);
17538}
17539
17540SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17541 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17542 return Value;
17543
17544 return SDValue();
17545}
17546
17547SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17548 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17549 return Value;
17550
17551 return SDValue();
17552}
17553
17554// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17555// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17556// When C is zero, the equation (addi Z, -C) can be simplified to Z
17557// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17559 const PPCSubtarget &Subtarget) {
17560 if (!Subtarget.isPPC64())
17561 return SDValue();
17562
17563 SDValue LHS = N->getOperand(0);
17564 SDValue RHS = N->getOperand(1);
17565
17567 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17568 Op.getValueType() != MVT::i64)
17569 return false;
17570
17571 SDValue Cmp = Op.getOperand(0);
17572 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17573 Cmp.getOperand(0).getValueType() != MVT::i64)
17574 return false;
17575
17576 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17577 int64_t NegConstant = 0 - Constant->getSExtValue();
17578 // Due to the limitations of the addi instruction,
17579 // -C is required to be [-32768, 32767].
17580 return isInt<16>(NegConstant);
17581 }
17582
17583 return false;
17584 };
17585
17588
17589 // If there is a pattern, canonicalize a zext operand to the RHS.
17591 std::swap(LHS, RHS);
17592 else if (!LHSHasPattern && !RHSHasPattern)
17593 return SDValue();
17594
17595 SDLoc DL(N);
17596 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17597 SDValue Cmp = RHS.getOperand(0);
17598 SDValue Z = Cmp.getOperand(0);
17599 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17600 int64_t NegConstant = 0 - Constant->getSExtValue();
17601
17602 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17603 default: break;
17604 case ISD::SETNE: {
17605 // when C == 0
17606 // --> addze X, (addic Z, -1).carry
17607 // /
17608 // add X, (zext(setne Z, C))--
17609 // \ when -32768 <= -C <= 32767 && C != 0
17610 // --> addze X, (addic (addi Z, -C), -1).carry
17611 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17612 DAG.getConstant(NegConstant, DL, MVT::i64));
17613 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17614 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17615 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17616 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17617 SDValue(Addc.getNode(), 1));
17618 }
17619 case ISD::SETEQ: {
17620 // when C == 0
17621 // --> addze X, (subfic Z, 0).carry
17622 // /
17623 // add X, (zext(sete Z, C))--
17624 // \ when -32768 <= -C <= 32767 && C != 0
17625 // --> addze X, (subfic (addi Z, -C), 0).carry
17626 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17627 DAG.getConstant(NegConstant, DL, MVT::i64));
17628 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17629 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17630 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17631 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17632 SDValue(Subc.getNode(), 1));
17633 }
17634 }
17635
17636 return SDValue();
17637}
17638
17639// Transform
17640// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17641// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17642// In this case both C1 and C2 must be known constants.
17643// C1+C2 must fit into a 34 bit signed integer.
17645 const PPCSubtarget &Subtarget) {
17646 if (!Subtarget.isUsingPCRelativeCalls())
17647 return SDValue();
17648
17649 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17650 // If we find that node try to cast the Global Address and the Constant.
17651 SDValue LHS = N->getOperand(0);
17652 SDValue RHS = N->getOperand(1);
17653
17654 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17655 std::swap(LHS, RHS);
17656
17657 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17658 return SDValue();
17659
17660 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17663
17664 // Check that both casts succeeded.
17665 if (!GSDN || !ConstNode)
17666 return SDValue();
17667
17668 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17669 SDLoc DL(GSDN);
17670
17671 // The signed int offset needs to fit in 34 bits.
17672 if (!isInt<34>(NewOffset))
17673 return SDValue();
17674
17675 // The new global address is a copy of the old global address except
17676 // that it has the updated Offset.
17677 SDValue GA =
17678 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17679 NewOffset, GSDN->getTargetFlags());
17681 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17682 return MatPCRel;
17683}
17684
17685SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17686 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17687 return Value;
17688
17689 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17690 return Value;
17691
17692 return SDValue();
17693}
17694
17695// Detect TRUNCATE operations on bitcasts of float128 values.
17696// What we are looking for here is the situtation where we extract a subset
17697// of bits from a 128 bit float.
17698// This can be of two forms:
17699// 1) BITCAST of f128 feeding TRUNCATE
17700// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17701// The reason this is required is because we do not have a legal i128 type
17702// and so we want to prevent having to store the f128 and then reload part
17703// of it.
17704SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17705 DAGCombinerInfo &DCI) const {
17706 // If we are using CRBits then try that first.
17707 if (Subtarget.useCRBits()) {
17708 // Check if CRBits did anything and return that if it did.
17709 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17710 return CRTruncValue;
17711 }
17712
17713 SDLoc dl(N);
17714 SDValue Op0 = N->getOperand(0);
17715
17716 // Looking for a truncate of i128 to i64.
17717 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17718 return SDValue();
17719
17720 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17721
17722 // SRL feeding TRUNCATE.
17723 if (Op0.getOpcode() == ISD::SRL) {
17725 // The right shift has to be by 64 bits.
17726 if (!ConstNode || ConstNode->getZExtValue() != 64)
17727 return SDValue();
17728
17729 // Switch the element number to extract.
17730 EltToExtract = EltToExtract ? 0 : 1;
17731 // Update Op0 past the SRL.
17732 Op0 = Op0.getOperand(0);
17733 }
17734
17735 // BITCAST feeding a TRUNCATE possibly via SRL.
17736 if (Op0.getOpcode() == ISD::BITCAST &&
17737 Op0.getValueType() == MVT::i128 &&
17738 Op0.getOperand(0).getValueType() == MVT::f128) {
17739 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17740 return DCI.DAG.getNode(
17741 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17742 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17743 }
17744 return SDValue();
17745}
17746
17747SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17748 SelectionDAG &DAG = DCI.DAG;
17749
17751 if (!ConstOpOrElement)
17752 return SDValue();
17753
17754 // An imul is usually smaller than the alternative sequence for legal type.
17756 isOperationLegal(ISD::MUL, N->getValueType(0)))
17757 return SDValue();
17758
17759 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17760 switch (this->Subtarget.getCPUDirective()) {
17761 default:
17762 // TODO: enhance the condition for subtarget before pwr8
17763 return false;
17764 case PPC::DIR_PWR8:
17765 // type mul add shl
17766 // scalar 4 1 1
17767 // vector 7 2 2
17768 return true;
17769 case PPC::DIR_PWR9:
17770 case PPC::DIR_PWR10:
17772 // type mul add shl
17773 // scalar 5 2 2
17774 // vector 7 2 2
17775
17776 // The cycle RATIO of related operations are showed as a table above.
17777 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17778 // scalar and vector type. For 2 instrs patterns, add/sub + shl
17779 // are 4, it is always profitable; but for 3 instrs patterns
17780 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17781 // So we should only do it for vector type.
17782 return IsAddOne && IsNeg ? VT.isVector() : true;
17783 }
17784 };
17785
17786 EVT VT = N->getValueType(0);
17787 SDLoc DL(N);
17788
17789 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17790 bool IsNeg = MulAmt.isNegative();
17791 APInt MulAmtAbs = MulAmt.abs();
17792
17793 if ((MulAmtAbs - 1).isPowerOf2()) {
17794 // (mul x, 2^N + 1) => (add (shl x, N), x)
17795 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17796
17797 if (!IsProfitable(IsNeg, true, VT))
17798 return SDValue();
17799
17800 SDValue Op0 = N->getOperand(0);
17801 SDValue Op1 =
17802 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17803 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17804 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17805
17806 if (!IsNeg)
17807 return Res;
17808
17809 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17810 } else if ((MulAmtAbs + 1).isPowerOf2()) {
17811 // (mul x, 2^N - 1) => (sub (shl x, N), x)
17812 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17813
17814 if (!IsProfitable(IsNeg, false, VT))
17815 return SDValue();
17816
17817 SDValue Op0 = N->getOperand(0);
17818 SDValue Op1 =
17819 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17820 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17821
17822 if (!IsNeg)
17823 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17824 else
17825 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17826
17827 } else {
17828 return SDValue();
17829 }
17830}
17831
17832// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17833// in combiner since we need to check SD flags and other subtarget features.
17834SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17835 DAGCombinerInfo &DCI) const {
17836 SDValue N0 = N->getOperand(0);
17837 SDValue N1 = N->getOperand(1);
17838 SDValue N2 = N->getOperand(2);
17839 SDNodeFlags Flags = N->getFlags();
17840 EVT VT = N->getValueType(0);
17841 SelectionDAG &DAG = DCI.DAG;
17843 unsigned Opc = N->getOpcode();
17844 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17845 bool LegalOps = !DCI.isBeforeLegalizeOps();
17846 SDLoc Loc(N);
17847
17848 if (!isOperationLegal(ISD::FMA, VT))
17849 return SDValue();
17850
17851 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17852 // since (fnmsub a b c)=-0 while c-ab=+0.
17853 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17854 return SDValue();
17855
17856 // (fma (fneg a) b c) => (fnmsub a b c)
17857 // (fnmsub (fneg a) b c) => (fma a b c)
17858 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17859 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17860
17861 // (fma a (fneg b) c) => (fnmsub a b c)
17862 // (fnmsub a (fneg b) c) => (fma a b c)
17863 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17864 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17865
17866 return SDValue();
17867}
17868
17869bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17870 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17871 if (!Subtarget.is64BitELFABI())
17872 return false;
17873
17874 // If not a tail call then no need to proceed.
17875 if (!CI->isTailCall())
17876 return false;
17877
17878 // If sibling calls have been disabled and tail-calls aren't guaranteed
17879 // there is no reason to duplicate.
17880 auto &TM = getTargetMachine();
17881 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17882 return false;
17883
17884 // Can't tail call a function called indirectly, or if it has variadic args.
17885 const Function *Callee = CI->getCalledFunction();
17886 if (!Callee || Callee->isVarArg())
17887 return false;
17888
17889 // Make sure the callee and caller calling conventions are eligible for tco.
17890 const Function *Caller = CI->getParent()->getParent();
17891 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17892 CI->getCallingConv()))
17893 return false;
17894
17895 // If the function is local then we have a good chance at tail-calling it
17896 return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17897}
17898
17899bool PPCTargetLowering::
17900isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17901 const Value *Mask = AndI.getOperand(1);
17902 // If the mask is suitable for andi. or andis. we should sink the and.
17903 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17904 // Can't handle constants wider than 64-bits.
17905 if (CI->getBitWidth() > 64)
17906 return false;
17907 int64_t ConstVal = CI->getZExtValue();
17908 return isUInt<16>(ConstVal) ||
17909 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17910 }
17911
17912 // For non-constant masks, we can always use the record-form and.
17913 return true;
17914}
17915
17916/// getAddrModeForFlags - Based on the set of address flags, select the most
17917/// optimal instruction format to match by.
17918PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17919 // This is not a node we should be handling here.
17920 if (Flags == PPC::MOF_None)
17921 return PPC::AM_None;
17922 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17923 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17924 if ((Flags & FlagSet) == FlagSet)
17925 return PPC::AM_DForm;
17926 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17927 if ((Flags & FlagSet) == FlagSet)
17928 return PPC::AM_DSForm;
17929 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17930 if ((Flags & FlagSet) == FlagSet)
17931 return PPC::AM_DQForm;
17932 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17933 if ((Flags & FlagSet) == FlagSet)
17935 // If no other forms are selected, return an X-Form as it is the most
17936 // general addressing mode.
17937 return PPC::AM_XForm;
17938}
17939
17940/// Set alignment flags based on whether or not the Frame Index is aligned.
17941/// Utilized when computing flags for address computation when selecting
17942/// load and store instructions.
17943static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17944 SelectionDAG &DAG) {
17945 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17946 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17947 if (!FI)
17948 return;
17950 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17951 // If this is (add $FI, $S16Imm), the alignment flags are already set
17952 // based on the immediate. We just need to clear the alignment flags
17953 // if the FI alignment is weaker.
17954 if ((FrameIndexAlign % 4) != 0)
17956 if ((FrameIndexAlign % 16) != 0)
17958 // If the address is a plain FrameIndex, set alignment flags based on
17959 // FI alignment.
17960 if (!IsAdd) {
17961 if ((FrameIndexAlign % 4) == 0)
17963 if ((FrameIndexAlign % 16) == 0)
17965 }
17966}
17967
17968/// Given a node, compute flags that are used for address computation when
17969/// selecting load and store instructions. The flags computed are stored in
17970/// FlagSet. This function takes into account whether the node is a constant,
17971/// an ADD, OR, or a constant, and computes the address flags accordingly.
17973 SelectionDAG &DAG) {
17974 // Set the alignment flags for the node depending on if the node is
17975 // 4-byte or 16-byte aligned.
17976 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17977 if ((Imm & 0x3) == 0)
17979 if ((Imm & 0xf) == 0)
17981 };
17982
17984 // All 32-bit constants can be computed as LIS + Disp.
17985 const APInt &ConstImm = CN->getAPIntValue();
17986 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17988 SetAlignFlagsForImm(ConstImm.getZExtValue());
17990 }
17991 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17993 else // Let constant materialization handle large constants.
17995 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17996 // This address can be represented as an addition of:
17997 // - Register + Imm16 (possibly a multiple of 4/16)
17998 // - Register + Imm34
17999 // - Register + PPCISD::Lo
18000 // - Register + Register
18001 // In any case, we won't have to match this as Base + Zero.
18002 SDValue RHS = N.getOperand(1);
18004 const APInt &ConstImm = CN->getAPIntValue();
18005 if (ConstImm.isSignedIntN(16)) {
18006 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18007 SetAlignFlagsForImm(ConstImm.getZExtValue());
18009 }
18010 if (ConstImm.isSignedIntN(34))
18011 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18012 else
18013 FlagSet |= PPC::MOF_RPlusR; // Register.
18014 } else if (RHS.getOpcode() == PPCISD::Lo &&
18015 !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
18016 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18017 else
18019 } else { // The address computation is not a constant or an addition.
18022 }
18023}
18024
18032
18033/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18034/// the address flags of the load/store instruction that is to be matched.
18035unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18036 SelectionDAG &DAG) const {
18037 unsigned FlagSet = PPC::MOF_None;
18038
18039 // Compute subtarget flags.
18040 if (!Subtarget.hasP9Vector())
18042 else {
18044 if (Subtarget.hasPrefixInstrs())
18046 }
18047 if (Subtarget.hasSPE())
18049
18050 // Check if we have a PCRel node and return early.
18052 return FlagSet;
18053
18054 // If the node is the paired load/store intrinsics, compute flags for
18055 // address computation and return early.
18056 unsigned ParentOp = Parent->getOpcode();
18057 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18059 unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
18060 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18061 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18062 ? Parent->getOperand(2)
18063 : Parent->getOperand(3);
18066 return FlagSet;
18067 }
18068 }
18069
18070 // Mark this as something we don't want to handle here if it is atomic
18071 // or pre-increment instruction.
18072 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18073 if (LSB->isIndexed())
18074 return PPC::MOF_None;
18075
18076 // Compute in-memory type flags. This is based on if there are scalars,
18077 // floats or vectors.
18078 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18079 assert(MN && "Parent should be a MemSDNode!");
18080 EVT MemVT = MN->getMemoryVT();
18081 unsigned Size = MemVT.getSizeInBits();
18082 if (MemVT.isScalarInteger()) {
18083 assert(Size <= 128 &&
18084 "Not expecting scalar integers larger than 16 bytes!");
18085 if (Size < 32)
18087 else if (Size == 32)
18089 else
18091 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18092 if (Size == 128)
18094 else if (Size == 256) {
18095 assert(Subtarget.pairedVectorMemops() &&
18096 "256-bit vectors are only available when paired vector memops is "
18097 "enabled!");
18099 } else
18100 llvm_unreachable("Not expecting illegal vectors!");
18101 } else { // Floating point type: can be scalar, f128 or vector types.
18102 if (Size == 32 || Size == 64)
18104 else if (MemVT == MVT::f128 || MemVT.isVector())
18106 else
18107 llvm_unreachable("Not expecting illegal scalar floats!");
18108 }
18109
18110 // Compute flags for address computation.
18112
18113 // Compute type extension flags.
18114 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18115 switch (LN->getExtensionType()) {
18116 case ISD::SEXTLOAD:
18118 break;
18119 case ISD::EXTLOAD:
18120 case ISD::ZEXTLOAD:
18122 break;
18123 case ISD::NON_EXTLOAD:
18125 break;
18126 }
18127 } else
18129
18130 // For integers, no extension is the same as zero extension.
18131 // We set the extension mode to zero extension so we don't have
18132 // to add separate entries in AddrModesMap for loads and stores.
18133 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18135 FlagSet &= ~PPC::MOF_NoExt;
18136 }
18137
18138 // If we don't have prefixed instructions, 34-bit constants should be
18139 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18140 bool IsNonP1034BitConst =
18143 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18146
18147 return FlagSet;
18148}
18149
18150/// SelectForceXFormMode - Given the specified address, force it to be
18151/// represented as an indexed [r+r] operation (an XForm instruction).
18153 SDValue &Base,
18154 SelectionDAG &DAG) const {
18155
18157 int16_t ForceXFormImm = 0;
18158 if (provablyDisjointOr(DAG, N) &&
18159 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18160 Disp = N.getOperand(0);
18161 Base = N.getOperand(1);
18162 return Mode;
18163 }
18164
18165 // If the address is the result of an add, we will utilize the fact that the
18166 // address calculation includes an implicit add. However, we can reduce
18167 // register pressure if we do not materialize a constant just for use as the
18168 // index register. We only get rid of the add if it is not an add of a
18169 // value and a 16-bit signed constant and both have a single use.
18170 if (N.getOpcode() == ISD::ADD &&
18171 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18172 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18173 Disp = N.getOperand(0);
18174 Base = N.getOperand(1);
18175 return Mode;
18176 }
18177
18178 // Otherwise, use R0 as the base register.
18179 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18180 N.getValueType());
18181 Base = N;
18182
18183 return Mode;
18184}
18185
18187 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18188 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18189 EVT ValVT = Val.getValueType();
18190 // If we are splitting a scalar integer into f64 parts (i.e. so they
18191 // can be placed into VFRC registers), we need to zero extend and
18192 // bitcast the values. This will ensure the value is placed into a
18193 // VSR using direct moves or stack operations as needed.
18194 if (PartVT == MVT::f64 &&
18195 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18196 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18197 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18198 Parts[0] = Val;
18199 return true;
18200 }
18201 return false;
18202}
18203
18204SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18205 SelectionDAG &DAG) const {
18206 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18208 EVT RetVT = Op.getValueType();
18209 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18210 SDValue Callee =
18212 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18215 for (const SDValue &N : Op->op_values()) {
18216 EVT ArgVT = N.getValueType();
18217 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18218 Entry.Node = N;
18219 Entry.Ty = ArgTy;
18220 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18221 Entry.IsZExt = !Entry.IsSExt;
18222 Args.push_back(Entry);
18223 }
18224
18227 const Function &F = DAG.getMachineFunction().getFunction();
18228 bool isTailCall =
18229 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18230 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18231 if (isTailCall)
18232 InChain = TCChain;
18233 CLI.setDebugLoc(SDLoc(Op))
18234 .setChain(InChain)
18235 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18236 .setTailCall(isTailCall)
18237 .setSExtResult(SignExtend)
18238 .setZExtResult(!SignExtend)
18240 return TLI.LowerCallTo(CLI).first;
18241}
18242
18243SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18244 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18245 SelectionDAG &DAG) const {
18246 if (Op.getValueType() == MVT::f32)
18247 return lowerToLibCall(LibCallFloatName, Op, DAG);
18248
18249 if (Op.getValueType() == MVT::f64)
18250 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18251
18252 return SDValue();
18253}
18254
18255bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18256 SDNodeFlags Flags = Op.getNode()->getFlags();
18257 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18258 Flags.hasNoNaNs() && Flags.hasNoInfs();
18259}
18260
18261bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18262 return Op.getNode()->getFlags().hasApproximateFuncs();
18263}
18264
18265bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18267}
18268
18269SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18270 const char *LibCallFloatName,
18271 const char *LibCallDoubleNameFinite,
18272 const char *LibCallFloatNameFinite,
18273 SDValue Op,
18274 SelectionDAG &DAG) const {
18275 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18276 return SDValue();
18277
18278 if (!isLowringToMASSFiniteSafe(Op))
18279 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18280 DAG);
18281
18282 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18284}
18285
18286SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18287 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18288 "__xl_powf_finite", Op, DAG);
18289}
18290
18291SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18292 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18293 "__xl_sinf_finite", Op, DAG);
18294}
18295
18296SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18297 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18298 "__xl_cosf_finite", Op, DAG);
18299}
18300
18301SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18302 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18303 "__xl_logf_finite", Op, DAG);
18304}
18305
18306SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18307 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18308 "__xl_log10f_finite", Op, DAG);
18309}
18310
18311SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18312 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18313 "__xl_expf_finite", Op, DAG);
18314}
18315
18316// If we happen to match to an aligned D-Form, check if the Frame Index is
18317// adequately aligned. If it is not, reset the mode to match to X-Form.
18318static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18319 PPC::AddrMode &Mode) {
18321 return;
18322 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18323 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18324 Mode = PPC::AM_XForm;
18325}
18326
18327/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18328/// compute the address flags of the node, get the optimal address mode based
18329/// on the flags, and set the Base and Disp based on the address mode.
18331 SDValue N, SDValue &Disp,
18332 SDValue &Base,
18333 SelectionDAG &DAG,
18334 MaybeAlign Align) const {
18335 SDLoc DL(Parent);
18336
18337 // Compute the address flags.
18338 unsigned Flags = computeMOFlags(Parent, N, DAG);
18339
18340 // Get the optimal address mode based on the Flags.
18341 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18342
18343 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18344 // Select an X-Form load if it is not.
18345 setXFormForUnalignedFI(N, Flags, Mode);
18346
18347 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18348 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18349 assert(Subtarget.isUsingPCRelativeCalls() &&
18350 "Must be using PC-Relative calls when a valid PC-Relative node is "
18351 "present!");
18352 Mode = PPC::AM_PCRel;
18353 }
18354
18355 // Set Base and Disp accordingly depending on the address mode.
18356 switch (Mode) {
18357 case PPC::AM_DForm:
18358 case PPC::AM_DSForm:
18359 case PPC::AM_DQForm: {
18360 // This is a register plus a 16-bit immediate. The base will be the
18361 // register and the displacement will be the immediate unless it
18362 // isn't sufficiently aligned.
18363 if (Flags & PPC::MOF_RPlusSImm16) {
18364 SDValue Op0 = N.getOperand(0);
18365 SDValue Op1 = N.getOperand(1);
18366 int16_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
18367 if (!Align || isAligned(*Align, Imm)) {
18368 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18369 Base = Op0;
18371 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18372 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18373 }
18374 break;
18375 }
18376 }
18377 // This is a register plus the @lo relocation. The base is the register
18378 // and the displacement is the global address.
18379 else if (Flags & PPC::MOF_RPlusLo) {
18380 Disp = N.getOperand(1).getOperand(0); // The global address.
18385 Base = N.getOperand(0);
18386 break;
18387 }
18388 // This is a constant address at most 32 bits. The base will be
18389 // zero or load-immediate-shifted and the displacement will be
18390 // the low 16 bits of the address.
18391 else if (Flags & PPC::MOF_AddrIsSImm32) {
18392 auto *CN = cast<ConstantSDNode>(N);
18393 EVT CNType = CN->getValueType(0);
18394 uint64_t CNImm = CN->getZExtValue();
18395 // If this address fits entirely in a 16-bit sext immediate field, codegen
18396 // this as "d, 0".
18397 int16_t Imm;
18398 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18399 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18400 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18401 CNType);
18402 break;
18403 }
18404 // Handle 32-bit sext immediate with LIS + Addr mode.
18405 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18406 (!Align || isAligned(*Align, CNImm))) {
18407 int32_t Addr = (int32_t)CNImm;
18408 // Otherwise, break this down into LIS + Disp.
18409 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18410 Base =
18411 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18412 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18413 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18414 break;
18415 }
18416 }
18417 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18418 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18420 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18421 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18422 } else
18423 Base = N;
18424 break;
18425 }
18426 case PPC::AM_PrefixDForm: {
18427 int64_t Imm34 = 0;
18428 unsigned Opcode = N.getOpcode();
18429 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18430 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18431 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18432 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18433 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18434 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18435 else
18436 Base = N.getOperand(0);
18437 } else if (isIntS34Immediate(N, Imm34)) {
18438 // The address is a 34-bit signed immediate.
18439 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18440 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18441 }
18442 break;
18443 }
18444 case PPC::AM_PCRel: {
18445 // When selecting PC-Relative instructions, "Base" is not utilized as
18446 // we select the address as [PC+imm].
18447 Disp = N;
18448 break;
18449 }
18450 case PPC::AM_None:
18451 break;
18452 default: { // By default, X-Form is always available to be selected.
18453 // When a frame index is not aligned, we also match by XForm.
18455 Base = FI ? N : N.getOperand(1);
18456 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18457 N.getValueType())
18458 : N.getOperand(0);
18459 break;
18460 }
18461 }
18462 return Mode;
18463}
18464
18466 bool Return,
18467 bool IsVarArg) const {
18468 switch (CC) {
18469 case CallingConv::Cold:
18470 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18471 default:
18472 return CC_PPC64_ELF;
18473 }
18474}
18475
18477 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18478}
18479
18482 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18483 if (shouldInlineQuadwordAtomics() && Size == 128)
18485
18486 switch (AI->getOperation()) {
18490 default:
18492 }
18493
18494 llvm_unreachable("unreachable atomicrmw operation");
18495}
18496
18504
18505static Intrinsic::ID
18507 switch (BinOp) {
18508 default:
18509 llvm_unreachable("Unexpected AtomicRMW BinOp");
18511 return Intrinsic::ppc_atomicrmw_xchg_i128;
18512 case AtomicRMWInst::Add:
18513 return Intrinsic::ppc_atomicrmw_add_i128;
18514 case AtomicRMWInst::Sub:
18515 return Intrinsic::ppc_atomicrmw_sub_i128;
18516 case AtomicRMWInst::And:
18517 return Intrinsic::ppc_atomicrmw_and_i128;
18518 case AtomicRMWInst::Or:
18519 return Intrinsic::ppc_atomicrmw_or_i128;
18520 case AtomicRMWInst::Xor:
18521 return Intrinsic::ppc_atomicrmw_xor_i128;
18523 return Intrinsic::ppc_atomicrmw_nand_i128;
18524 }
18525}
18526
18528 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18529 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18530 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18531 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18532 Type *ValTy = Incr->getType();
18533 assert(ValTy->getPrimitiveSizeInBits() == 128);
18536 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18537 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18538 Value *IncrHi =
18539 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18540 Value *Addr =
18541 Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18542 Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
18543 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18544 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18545 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18546 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18547 return Builder.CreateOr(
18548 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18549}
18550
18552 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18553 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18554 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18555 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18556 Type *ValTy = CmpVal->getType();
18557 assert(ValTy->getPrimitiveSizeInBits() == 128);
18559 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18560 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18561 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18562 Value *CmpHi =
18563 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18564 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18565 Value *NewHi =
18566 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18567 Value *Addr =
18568 Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18569 emitLeadingFence(Builder, CI, Ord);
18570 Value *LoHi =
18571 Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
18572 emitTrailingFence(Builder, CI, Ord);
18573 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18574 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18575 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18576 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18577 return Builder.CreateOr(
18578 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18579}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
basic Basic Alias true
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Align
uint64_t Addr
uint64_t Size
bool End
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:114
static bool isConstantOrUndef(const SDValue Op)
Module.h This file contains the declarations for the Module class.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(VerifyEach)
const char LLVMTargetMachineRef TM
pre isel intrinsic Pre ISel Intrinsic Lowering
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:191
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:470
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
xray Insert XRay ops
Class for arbitrary precision integers.
Definition APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:212
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:427
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:274
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:112
The address of a basic block.
Definition Constants.h:874
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:260
This is the shared class of boolean and integer constants.
Definition Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
This is an important base class in LLVM.
Definition Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:645
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:670
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:678
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:642
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:237
const Function & getFunction() const
Definition Function.h:134
arg_iterator arg_begin()
Definition Function.h:776
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:319
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:644
const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:544
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:94
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition InlineAsm.h:363
static unsigned getKind(unsigned Flags)
Definition InlineAsm.h:351
const BasicBlock * getParent() const
Definition Instruction.h:90
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:388
Metadata node.
Definition Metadata.h:950
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:65
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
bool isAIXABI() const
bool useSoftFloat() const
const PPCFrameLowering * getFrameLowering() const override
bool needsSwapsForVSXMemOps() const
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
const PPCInstrInfo * getInstrInfo() const override
bool isSVR4ABI() const
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
POPCNTDKind hasPOPCNTD() const
bool isLittleEndian() const
bool isTargetLinux() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
bool isPredictableSelectIsExpensive() const
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:36
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:137
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:131
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:81
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition TypeSize.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:250
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:232
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition Type.cpp:301
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:171
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:247
A Use represents the edge between a Value definition and its users.
Definition Use.h:43
Value * getOperand(unsigned i) const
Definition User.h:169
unsigned getNumOperands() const
Definition User.h:191
LLVM Value Representation.
Definition Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
User * user_back()
Definition Value.h:407
Implementation for an ilist node.
Definition ilist_node.h:40
self_iterator getIterator()
Definition ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:269
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:780
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:199
@ GlobalAddress
Definition ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:787
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:688
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:255
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:909
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:899
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:229
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition ISDOpcodes.h:933
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:411
@ GlobalTLSAddress
Definition ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:620
@ TargetExternalSymbol
Definition ISDOpcodes.h:169
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:167
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition ISDOpcodes.h:981
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:924
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:222
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:164
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:876
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition ISDOpcodes.h:971
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:777
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:742
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition ISDOpcodes.h:964
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:795
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:674
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:884
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:449
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:442
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:833
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:866
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:423
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:783
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:340
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:95
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:156
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:141
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:114
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition PPC.h:119
@ MO_TPREL_HA
Definition PPC.h:166
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition PPC.h:106
@ MO_TLS
Definition PPC.h:175
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:133
@ MO_TPREL_LO
Definition PPC.h:165
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:162
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:151
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:128
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:146
@ MO_HA
Definition PPC.h:163
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:110
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local-exec TLS model on 32-bit AIX, produces a call to ....
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ XMC_PR
Program Code.
Definition XCOFF.h:104
@ XTY_ER
External reference.
Definition XCOFF.h:240
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:440
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:269
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:179
unsigned M1(unsigned Val)
Definition VE.h:468
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:264
bool convertToNonDenormSingle(APInt &ArgAPInt)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:156
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
@ Other
Any other memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1929
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:467
ArrayRef(const T &OneElt) -> ArrayRef< T >
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:433
constexpr unsigned BitWidth
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:449
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:291
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:230
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition APFloat.cpp:252
static constexpr roundingMode rmTowardZero
Definition APFloat.h:234
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:373
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:129
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:139
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:363
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:299
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:359
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:160
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:306
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:311
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:149
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:319
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:144
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:66
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs